In this project, our goal was to predict stock prices using a machine learning approach. To achieve this, we designed and implemented a model based on a set of carefully chosen features. These features included technical indicators such as Relative Strength Index (RSI), Money Flow Index (MFI), Exponential Moving Averages (EMA), Simple Moving Average (SMA),Moving Average Convergence Divergence (MACD) as well as historical price data encompassing the previous 1 day, 3 days, 5 days, and 1, 2, 3, 4 weeks. Additionally, rolling average values for high, low, open, close, adjusted close, and volume were incorporated.
import os
import time
import numpy as np
import pandas as pd
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
pd.set_option('display.max_columns', None)
# Chart drawing
import plotly as py
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
# Mute sklearn warnings
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=DeprecationWarning)
# Show charts when running kernel
#init_notebook_mode(connected=True)
# Change default background color for all visualizations
layout=go.Layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(250,250,250,0.8)')
fig = go.Figure(layout=layout)
templated_fig = pio.to_templated(fig)
pio.templates['my_template'] = templated_fig.layout.template
pio.templates.default = 'my_template'
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="xgboost")
warnings.filterwarnings("ignore")
def evaluate_regression_model(y_true, y_pred):
"""
Calculate and print evaluation metrics for a regression model.
Parameters:
- y_true: Actual values.
- y_pred: Predicted values.
Returns:
- Dictionary containing the evaluation metrics.
"""
# Calculate evaluation metrics
mse = mean_squared_error(y_true, y_pred)
rmse = mean_squared_error(y_true, y_pred, squared=False)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
# Print the results
print(f'Mean Squared Error (MSE): {np.round(mse,3)}')
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f'Mean Absolute Error (MAE): {np.round(mae,3)}')
print(f'R-squared (R2): {np.round(r2,3)}')
# Return results as a dictionary
results = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
return results
def evaluate_regression_model2(y_true, y_pred):
"""
Calculate and print evaluation metrics for a regression model.
Parameters:
- y_true: Actual values.
- y_pred: Predicted values.
Returns:
- Dictionary containing the evaluation metrics.
"""
# Calculate evaluation metrics
mse = mean_squared_error(y_true, y_pred)
rmse = mean_squared_error(y_true, y_pred, squared=False)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
# Return results as a dictionary
results = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
return results
# Returns RSI values
def rsi(df, periods = 14):
"""
Calculate the Relative Strength Index (RSI) for a given DataFrame.
Parameters:
- df (DataFrame): Pandas DataFrame with a 'close' column.
- periods (int): Number of periods to consider for RSI calculation. Default is 14.
Returns:
- Series: A pandas Series containing the RSI values.
"""
close = df['close']
close_delta = close.diff()
# Make two series: one for lower closes and one for higher closes
up = close_delta.clip(lower=0)
down = -1 * close_delta.clip(upper=0)
ma_up = up.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
ma_down = down.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
rsi = ma_up / ma_down
rsi = 100 - (100/(1 + rsi))
return rsi
def gain(x):
return ((x > 0) * x).sum()
def loss(x):
return ((x < 0) * x).sum()
def mfi(df, n=14):
"""
Calculate the Money Flow Index (MFI) for a given DataFrame.
Parameters:
- df (DataFrame): Pandas DataFrame with 'high', 'low', 'close', and 'volume' columns.
- n (int): Number of periods to use for the MFI calculation. Default is 14.
Returns:
- numpy.ndarray: An array containing the MFI values.
"""
high = df['high']
low = df['low']
close = df['close']
volume = df['volume']
typical_price = (high + low + close) / 3
money_flow = typical_price * volume
mf_sign = np.where(typical_price > typical_price.shift(1), 1, -1)
signed_mf = money_flow * mf_sign
# Calculate gain and loss using vectorized operations
positive_mf = np.where(signed_mf > 0, signed_mf, 0)
negative_mf = np.where(signed_mf < 0, -signed_mf, 0)
mf_avg_gain = pd.Series(positive_mf).rolling(n, min_periods=1).sum()
mf_avg_loss = pd.Series(negative_mf).rolling(n, min_periods=1).sum()
return (100 - 100 / (1 + mf_avg_gain / mf_avg_loss)).to_numpy()
def plot_regression_accuracy(y_true, y_pred):
"""
Create various plots to evaluate the accuracy of a linear regression model.
Parameters:
- y_true: Actual values.
- y_pred: Predicted values.
"""
# Scatter Plot
plt.scatter(y_true, y_pred)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Scatter Plot of Actual vs Predicted Values')
plt.show()
# Residual Plot
residuals = y_true - y_pred
plt.scatter(y_pred, residuals)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residual Plot')
plt.show()
# Distribution of Residuals
sns.histplot(residuals, kde=True)
plt.xlabel('Residuals')
plt.ylabel('Frequency')
plt.title('Distribution of Residuals')
plt.show()
# Predicted vs Actual Line
plt.plot(y_true, y_true, linestyle='--', color='r', label='Perfect Fit')
plt.scatter(y_true, y_pred)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Predicted vs Actual Values with Perfect Fit Line')
plt.legend()
plt.show()
def plot_predictions(df,prediction):
"""
Create a Plotly graph to compare actual values with predictions.
Parameters:
- df (DataFrame): A pandas DataFrame containing 'date' and 'close_1d_next' columns.
- prediction (array-like): Predicted values corresponding to the test set.
"""
plot_test_df= df[df.date.dt.year>=2020]
plot_test_df['prediction'] = prediction
fig = make_subplots(rows=2, cols=1)
fig.add_trace(go.Scatter(x=df.date, y=df.close_1d_next,
name='Truth',
marker_color='LightSkyBlue'), row=1, col=1)
fig.add_trace(go.Scatter(x=plot_test_df.date,
y=plot_test_df.prediction,
name='Prediction',
marker_color='MediumPurple'), row=1, col=1)
# Add title and Y-axis title for the first subplot
fig.update_layout(title_text='Train Data and Test Data', title_x=0.5, title_y=0.9)
fig.update_yaxes(title_text='Prediction', row=1, col=1)
fig.add_trace(go.Scatter(x=plot_test_df.date,
y=y_test,
name='Truth',
marker_color='LightSkyBlue',
showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=plot_test_df.date,
y=prediction,
name='Prediction',
marker_color='MediumPurple',
showlegend=False), row=2, col=1)
fig.update_yaxes(title_text='Prediction', row=2, col=1)
fig.show()
def plot_feature_importance(model,X_train,top_features):
"""
Plot the feature importance from a linear regression model and return a sorted DataFrame of feature importances.
Parameters:
- model: A trained linear regression model with a coef_ attribute.
- X_train (DataFrame): The DataFrame used to train the model, for feature names.
- num_top_features (int): Number of top features to display.
Returns:
- DataFrame: Sorted DataFrame with features and their importance.
"""
# Get feature importance scores (coefficients)
feature_importance = model.coef_
# Create a DataFrame to store feature names and importance scores
feature_importance_df = pd.DataFrame({'Feature': X_train.columns, 'Importance': np.abs(feature_importance)})
# Sort features by importance
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False).reset_index(drop=True)
top_features = top_features
# Plot feature importance
plt.figure(figsize=(20, 6))
plt.barh(range(len(feature_importance_df[:top_features])), feature_importance_df[:top_features]['Importance'], align="center")
plt.yticks(range(len(feature_importance_df[:top_features])), labels=feature_importance_df[:top_features]['Feature'])
plt.ylabel("Features")
plt.xlabel("Coefficient Magnitude")
plt.title(f"Top {top_features} Feature Importance Values")
plt.show()
return feature_importance_df
out_loc = '/Users/isapocan/Desktop/LSU/data/'
# Define the file path for the parquet file
parquet_file_path = out_loc + "stock_1d.parquet"
try:
# Read the Parquet file into a DataFrame
df = pd.read_parquet(parquet_file_path)
# Convert column names to lowercase for consistency
df.columns = df.columns.str.lower()
# Display the first few rows of the DataFrame
display(df.head())
except Exception as e:
print(f"An error occurred while reading the file: {e}")
| date | open | high | low | close | adj close | volume | symbol | security | gics sector | gics sub-industry | headquarters location | date added | cik | founded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2013-01-02 | 94.190002 | 94.790001 | 93.959999 | 94.779999 | 67.895119 | 3206700.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
| 1 | 2013-01-03 | 94.339996 | 94.930000 | 94.129997 | 94.669998 | 67.816322 | 2704600.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
| 2 | 2013-01-04 | 94.790001 | 95.480003 | 94.540001 | 95.370003 | 68.317757 | 2704900.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
| 3 | 2013-01-07 | 95.019997 | 95.730003 | 94.760002 | 95.489998 | 68.403717 | 2745800.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
| 4 | 2013-01-08 | 95.169998 | 95.750000 | 95.099998 | 95.500000 | 68.410889 | 2655500.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
# Filter the DataFrame to include only rows where 'symbol' is 'MDLZ'
df = df[df['symbol']=='MDLZ']
# Display the first few rows and the shape of the filtered DataFrame
display(df.head())
display(df.shape)
| date | open | high | low | close | adj close | volume | symbol | security | gics sector | gics sub-industry | headquarters location | date added | cik | founded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 852843 | 2013-01-02 | 25.840000 | 26.690001 | 25.780001 | 26.670000 | 21.445908 | 17862400.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 |
| 852844 | 2013-01-03 | 26.700001 | 26.770000 | 26.490000 | 26.639999 | 21.421791 | 9075500.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 |
| 852845 | 2013-01-04 | 26.700001 | 26.830000 | 26.549999 | 26.740000 | 21.502203 | 7696000.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 |
| 852846 | 2013-01-07 | 26.620001 | 26.740000 | 26.549999 | 26.660000 | 21.437866 | 7576200.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 |
| 852847 | 2013-01-08 | 26.520000 | 26.920000 | 26.459999 | 26.680000 | 21.453959 | 14360800.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 |
(2733, 15)
Description: RSI helps you understand if a stock is likely to be overbought (prices too high) or oversold (prices too low). It looks at recent price changes to make this determination.
Description: MFI considers both price and trading volume to identify if a stock is overbought or oversold. It helps gauge the strength of buying and selling pressure.
Description: EMA smoothens out price data, giving more weight to recent prices. It reacts faster to price changes compared to a Simple Moving Average (SMA), making it useful for trend analysis.
Description: SMA is a basic average of stock prices over a specific period. It provides a smoothed representation of the overall price trend, helping to identify general market direction.
Description: MACD is a trend-following momentum indicator that shows the relationship between two moving averages of a security's price. It helps identify potential trend reversals or momentum shifts.
Description: The MACD signal line is a nine-day EMA of the MACD. It is used to generate trading signals. When the MACD crosses above the signal line, it might be a signal to buy, and when it crosses below, it might be a signal to sell.
def add_moving_averages(df, column_name):
"""
Adds various moving averages to the DataFrame.
Parameters:
- df (DataFrame): The DataFrame to modify.
- column_name (str): The column name to calculate moving averages for.
"""
# Exponential Moving Average (EMA)
df['ema_9'] = df[column_name].ewm(span=9).mean().shift()
# Simple Moving Averages (SMA) with different periods
for period in [5, 10, 15, 30]:
df[f'sma_{period}'] = df[column_name].rolling(window=period).mean().shift()
# Add moving averages for the 'close' column
add_moving_averages(df, 'close')
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2733 entries, 852843 to 855575 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 2733 non-null datetime64[ns] 1 open 2733 non-null float64 2 high 2733 non-null float64 3 low 2733 non-null float64 4 close 2733 non-null float64 5 adj close 2733 non-null float64 6 volume 2733 non-null float64 7 symbol 2733 non-null object 8 security 2733 non-null object 9 gics sector 2733 non-null object 10 gics sub-industry 2733 non-null object 11 headquarters location 2733 non-null object 12 date added 2733 non-null object 13 cik 2733 non-null int64 14 founded 2733 non-null object 15 ema_9 2732 non-null float64 16 sma_5 2728 non-null float64 17 sma_10 2723 non-null float64 18 sma_15 2718 non-null float64 19 sma_30 2703 non-null float64 dtypes: datetime64[ns](1), float64(11), int64(1), object(7) memory usage: 448.4+ KB
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2733 entries, 852843 to 855575 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 2733 non-null datetime64[ns] 1 open 2733 non-null float64 2 high 2733 non-null float64 3 low 2733 non-null float64 4 close 2733 non-null float64 5 adj close 2733 non-null float64 6 volume 2733 non-null float64 7 symbol 2733 non-null object 8 security 2733 non-null object 9 gics sector 2733 non-null object 10 gics sub-industry 2733 non-null object 11 headquarters location 2733 non-null object 12 date added 2733 non-null object 13 cik 2733 non-null int64 14 founded 2733 non-null object 15 ema_9 2732 non-null float64 16 sma_5 2728 non-null float64 17 sma_10 2723 non-null float64 18 sma_15 2718 non-null float64 19 sma_30 2703 non-null float64 dtypes: datetime64[ns](1), float64(11), int64(1), object(7) memory usage: 448.4+ KB
# Add a Relative Strength Index (RSI) column to the DataFrame
try:
df['rsi'] = rsi(df) # Uncomment and adjust fillna(0) if appropriate for handling missing values
except Exception as e:
print(f"Error calculating RSI: {e}")
# Add a Money Flow Index (MFI) column to the DataFrame
try:
df['mfi'] = mfi(df, 14) # The second argument is the period, here assumed to be 14
except Exception as e:
print(f"Error calculating MFI: {e}")
df[['date','close','ema_9','sma_5','sma_10','sma_15','sma_30','rsi','mfi']]
| date | close | ema_9 | sma_5 | sma_10 | sma_15 | sma_30 | rsi | mfi | |
|---|---|---|---|---|---|---|---|---|---|
| 852843 | 2013-01-02 | 26.670000 | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 |
| 852844 | 2013-01-03 | 26.639999 | 26.670000 | NaN | NaN | NaN | NaN | NaN | 33.904295 |
| 852845 | 2013-01-04 | 26.740000 | 26.653333 | NaN | NaN | NaN | NaN | NaN | 48.695375 |
| 852846 | 2013-01-07 | 26.660000 | 26.688852 | NaN | NaN | NaN | NaN | NaN | 39.919745 |
| 852847 | 2013-01-08 | 26.680000 | 26.679078 | NaN | NaN | NaN | NaN | NaN | 55.233142 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 855571 | 2023-11-02 | 67.970001 | 65.583879 | 65.934001 | 65.321001 | 64.406001 | 65.993000 | 60.257764 | 89.207420 |
| 855572 | 2023-11-03 | 68.820000 | 66.061103 | 66.398001 | 65.697001 | 64.868001 | 65.901667 | 63.726091 | 89.458580 |
| 855573 | 2023-11-06 | 68.239998 | 66.612883 | 67.160001 | 66.169001 | 65.354001 | 65.848000 | 59.885606 | 83.710782 |
| 855574 | 2023-11-07 | 68.489998 | 66.938306 | 67.612000 | 66.594001 | 65.728667 | 65.799000 | 60.977252 | 75.937617 |
| 855575 | 2023-11-08 | 69.019997 | 67.248644 | 68.067999 | 66.888000 | 66.058667 | 65.729667 | 63.259914 | 75.566164 |
2733 rows × 9 columns
# Calculate and display the correlation between 'rsi' and 'mfi'
if {'rsi', 'mfi'}.issubset(df.columns):
correlation = df[['rsi', 'mfi']].corr()
print(correlation)
else:
print("DataFrame does not contain 'rsi' and/or 'mfi' columns.")
rsi mfi rsi 1.000000 0.698958 mfi 0.698958 1.000000
# calculating the Moving Average Convergence Divergence (MACD) and its signal line
# Ensure the 'close' column exists in the DataFrame
if 'close' in df.columns:
# Calculate the 12-period EMA of the closing prices
df['macd'] = df['close'].ewm(span=12, min_periods=12).mean() - df['close'].ewm(span=26, min_periods=26).mean()
# Calculate the 9-period EMA of the MACD values (signal line)
df['macd_signal'] = df['macd'].ewm(span=9, min_periods=9).mean()
else:
print("DataFrame does not contain 'close' column.")
# Check if the required columns exist in the DataFrame
if {'macd', 'macd_signal'}.issubset(df.columns):
# Select rows where 'macd' and 'macd_signal' columns do not have missing values
filtered_df = df[(~df['macd'].isna()) & (~df['macd_signal'].isna())]
# Display the first few rows of these columns
print(filtered_df[['macd', 'macd_signal']].head())
else:
print("DataFrame does not contain 'macd' and/or 'macd_signal' columns.")
macd macd_signal 852876 -0.147786 -0.050945 852877 -0.175230 -0.078792 852878 -0.198438 -0.104970 852879 -0.235462 -0.132994 852880 -0.226841 -0.152855
# Check if the required columns exist in the DataFrame
if {'date', 'close'}.issubset(df.columns):
# Create a new column representing the next day's closing price
df['close_1d_next'] = df['close'].shift(-1)
# Display the first few rows including 'date', 'close', and 'close_1d_next'
print(df[['date', 'close', 'close_1d_next']].head())
else:
print("DataFrame does not contain 'date' and/or 'close' columns.")
date close close_1d_next 852843 2013-01-02 26.670000 26.639999 852844 2013-01-03 26.639999 26.740000 852845 2013-01-04 26.740000 26.660000 852846 2013-01-07 26.660000 26.680000 852847 2013-01-08 26.680000 27.049999
def add_lagged_features(df, column_name, lags):
"""
Adds lagged features for a specified column in the DataFrame.
Parameters:
- df (DataFrame): The DataFrame to modify.
- column_name (str): The column name to create lagged features for.
- lags (list of int): The list of lag periods.
"""
for lag in lags:
df[f'{column_name}_{lag}d_ago'] = df[column_name].shift(lag)
def add_rolling_avg_features(df, column_name, windows):
"""
Adds rolling average features for a specified column in the DataFrame.
Parameters:
- df (DataFrame): The DataFrame to modify.
- column_name (str): The column name to create rolling average features for.
- windows (list of int): The list of rolling window sizes.
"""
for window in windows:
df[f'{column_name}_{window}d_avg'] = df[column_name].rolling(window=window).mean()
# Define lag periods and rolling window sizes
lag_periods = [1, 3, 5, 7, 14, 21, 28]
rolling_windows = [3, 5, 7, 10, 15, 30]
# Columns to create features for
columns = ['close', 'adj close', 'open', 'high', 'low', 'volume']
# Add lagged and rolling average features for each column
for column in columns:
add_lagged_features(df, column, lag_periods)
add_rolling_avg_features(df, column, rolling_windows)
# View the DataFrame
df.head()
| date | open | high | low | close | adj close | volume | symbol | security | gics sector | gics sub-industry | headquarters location | date added | cik | founded | ema_9 | sma_5 | sma_10 | sma_15 | sma_30 | rsi | mfi | macd | macd_signal | close_1d_next | close_1d_ago | close_3d_ago | close_5d_ago | close_7d_ago | close_14d_ago | close_21d_ago | close_28d_ago | close_3d_avg | close_5d_avg | close_7d_avg | close_10d_avg | close_15d_avg | close_30d_avg | adj close_1d_ago | adj close_3d_ago | adj close_5d_ago | adj close_7d_ago | adj close_14d_ago | adj close_21d_ago | adj close_28d_ago | adj close_3d_avg | adj close_5d_avg | adj close_7d_avg | adj close_10d_avg | adj close_15d_avg | adj close_30d_avg | open_1d_ago | open_3d_ago | open_5d_ago | open_7d_ago | open_14d_ago | open_21d_ago | open_28d_ago | open_3d_avg | open_5d_avg | open_7d_avg | open_10d_avg | open_15d_avg | open_30d_avg | high_1d_ago | high_3d_ago | high_5d_ago | high_7d_ago | high_14d_ago | high_21d_ago | high_28d_ago | high_3d_avg | high_5d_avg | high_7d_avg | high_10d_avg | high_15d_avg | high_30d_avg | low_1d_ago | low_3d_ago | low_5d_ago | low_7d_ago | low_14d_ago | low_21d_ago | low_28d_ago | low_3d_avg | low_5d_avg | low_7d_avg | low_10d_avg | low_15d_avg | low_30d_avg | volume_1d_ago | volume_3d_ago | volume_5d_ago | volume_7d_ago | volume_14d_ago | volume_21d_ago | volume_28d_ago | volume_3d_avg | volume_5d_avg | volume_7d_avg | volume_10d_avg | volume_15d_avg | volume_30d_avg | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 852843 | 2013-01-02 | 25.840000 | 26.690001 | 25.780001 | 26.670000 | 21.445908 | 17862400.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | NaN | NaN | 26.639999 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 852844 | 2013-01-03 | 26.700001 | 26.770000 | 26.490000 | 26.639999 | 21.421791 | 9075500.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 26.670000 | NaN | NaN | NaN | NaN | NaN | 33.904295 | NaN | NaN | 26.740000 | 26.670000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 21.445908 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 25.840000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 26.690001 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 25.780001 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 17862400.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 852845 | 2013-01-04 | 26.700001 | 26.830000 | 26.549999 | 26.740000 | 21.502203 | 7696000.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 26.653333 | NaN | NaN | NaN | NaN | NaN | 48.695375 | NaN | NaN | 26.660000 | 26.639999 | NaN | NaN | NaN | NaN | NaN | NaN | 26.683333 | NaN | NaN | NaN | NaN | NaN | 21.421791 | NaN | NaN | NaN | NaN | NaN | NaN | 21.456634 | NaN | NaN | NaN | NaN | NaN | 26.700001 | NaN | NaN | NaN | NaN | NaN | NaN | 26.413334 | NaN | NaN | NaN | NaN | NaN | 26.770000 | NaN | NaN | NaN | NaN | NaN | NaN | 26.763334 | NaN | NaN | NaN | NaN | NaN | 26.490000 | NaN | NaN | NaN | NaN | NaN | NaN | 26.273333 | NaN | NaN | NaN | NaN | NaN | 9075500.0 | NaN | NaN | NaN | NaN | NaN | NaN | 1.154463e+07 | NaN | NaN | NaN | NaN | NaN |
| 852846 | 2013-01-07 | 26.620001 | 26.740000 | 26.549999 | 26.660000 | 21.437866 | 7576200.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 26.688852 | NaN | NaN | NaN | NaN | NaN | 39.919745 | NaN | NaN | 26.680000 | 26.740000 | 26.670000 | NaN | NaN | NaN | NaN | NaN | 26.680000 | NaN | NaN | NaN | NaN | NaN | 21.502203 | 21.445908 | NaN | NaN | NaN | NaN | NaN | 21.453953 | NaN | NaN | NaN | NaN | NaN | 26.700001 | 25.840000 | NaN | NaN | NaN | NaN | NaN | 26.673334 | NaN | NaN | NaN | NaN | NaN | 26.830000 | 26.690001 | NaN | NaN | NaN | NaN | NaN | 26.780000 | NaN | NaN | NaN | NaN | NaN | 26.549999 | 25.780001 | NaN | NaN | NaN | NaN | NaN | 26.529999 | NaN | NaN | NaN | NaN | NaN | 7696000.0 | 17862400.0 | NaN | NaN | NaN | NaN | NaN | 8.115900e+06 | NaN | NaN | NaN | NaN | NaN |
| 852847 | 2013-01-08 | 26.520000 | 26.920000 | 26.459999 | 26.680000 | 21.453959 | 14360800.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 26.679078 | NaN | NaN | NaN | NaN | NaN | 55.233142 | NaN | NaN | 27.049999 | 26.660000 | 26.639999 | NaN | NaN | NaN | NaN | NaN | 26.693333 | 26.678 | NaN | NaN | NaN | NaN | 21.437866 | 21.421791 | NaN | NaN | NaN | NaN | NaN | 21.464676 | 21.452345 | NaN | NaN | NaN | NaN | 26.620001 | 26.700001 | NaN | NaN | NaN | NaN | NaN | 26.613334 | 26.476001 | NaN | NaN | NaN | NaN | 26.740000 | 26.770000 | NaN | NaN | NaN | NaN | NaN | 26.830000 | 26.79 | NaN | NaN | NaN | NaN | 26.549999 | 26.490000 | NaN | NaN | NaN | NaN | NaN | 26.519999 | 26.366 | NaN | NaN | NaN | NaN | 7576200.0 | 9075500.0 | NaN | NaN | NaN | NaN | NaN | 9.877667e+06 | 11314180.0 | NaN | NaN | NaN | NaN |
# Check if the DataFrame contains any missing values
if df.isna().any().any():
# Remove records with missing values and reset the index
df = df.dropna().reset_index(drop=True)
print("Missing records removed. DataFrame is now cleaned.")
else:
df = df.copy()
print("No missing records found. DataFrame remains unchanged.")
# Display the first few rows of the cleaned DataFrame
df.head()
Missing records removed. DataFrame is now cleaned.
| date | open | high | low | close | adj close | volume | symbol | security | gics sector | gics sub-industry | headquarters location | date added | cik | founded | ema_9 | sma_5 | sma_10 | sma_15 | sma_30 | rsi | mfi | macd | macd_signal | close_1d_next | close_1d_ago | close_3d_ago | close_5d_ago | close_7d_ago | close_14d_ago | close_21d_ago | close_28d_ago | close_3d_avg | close_5d_avg | close_7d_avg | close_10d_avg | close_15d_avg | close_30d_avg | adj close_1d_ago | adj close_3d_ago | adj close_5d_ago | adj close_7d_ago | adj close_14d_ago | adj close_21d_ago | adj close_28d_ago | adj close_3d_avg | adj close_5d_avg | adj close_7d_avg | adj close_10d_avg | adj close_15d_avg | adj close_30d_avg | open_1d_ago | open_3d_ago | open_5d_ago | open_7d_ago | open_14d_ago | open_21d_ago | open_28d_ago | open_3d_avg | open_5d_avg | open_7d_avg | open_10d_avg | open_15d_avg | open_30d_avg | high_1d_ago | high_3d_ago | high_5d_ago | high_7d_ago | high_14d_ago | high_21d_ago | high_28d_ago | high_3d_avg | high_5d_avg | high_7d_avg | high_10d_avg | high_15d_avg | high_30d_avg | low_1d_ago | low_3d_ago | low_5d_ago | low_7d_ago | low_14d_ago | low_21d_ago | low_28d_ago | low_3d_avg | low_5d_avg | low_7d_avg | low_10d_avg | low_15d_avg | low_30d_avg | volume_1d_ago | volume_3d_ago | volume_5d_ago | volume_7d_ago | volume_14d_ago | volume_21d_ago | volume_28d_ago | volume_3d_avg | volume_5d_avg | volume_7d_avg | volume_10d_avg | volume_15d_avg | volume_30d_avg | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2013-02-20 | 27.070000 | 27.150000 | 26.950001 | 27.030001 | 21.735399 | 17057200.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 27.307926 | 27.136 | 27.518 | 27.642000 | 27.589000 | 41.633625 | 53.176274 | -0.147786 | -0.050945 | 26.820000 | 26.959999 | 26.570000 | 27.680000 | 27.76 | 27.730000 | 28.080000 | 27.049999 | 26.903333 | 27.006 | 27.208571 | 27.426 | 27.588667 | 27.601333 | 21.679117 | 21.365499 | 22.258080 | 22.322405 | 22.298285 | 22.579723 | 21.751484 | 21.633545 | 21.716101 | 21.878994 | 22.053831 | 22.184635 | 22.194819 | 26.750000 | 26.690001 | 27.700001 | 27.799999 | 27.830000 | 27.969999 | 26.790001 | 26.886667 | 27.018 | 27.217143 | 27.386 | 27.553333 | 27.536667 | 27.190001 | 27.020000 | 27.830000 | 28.100000 | 27.980000 | 28.100000 | 27.080000 | 27.136667 | 27.248000 | 27.410000 | 27.618 | 27.779333 | 27.754667 | 26.750000 | 26.450001 | 27.270000 | 27.750000 | 27.67 | 27.820000 | 26.68 | 26.766667 | 26.842 | 27.015714 | 27.224 | 27.411333 | 27.382333 | 18297500.0 | 37728900.0 | 14931000.0 | 11159200.0 | 5800400.0 | 15906900.0 | 11671400.0 | 1.904973e+07 | 21756140.0 | 1.907480e+07 | 17005580.0 | 1.419575e+07 | 1.352419e+07 |
| 1 | 2013-02-21 | 26.990000 | 27.049999 | 26.639999 | 26.820000 | 21.566534 | 16936600.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 27.252312 | 27.006 | 27.426 | 27.588667 | 27.601333 | 38.257648 | 47.431888 | -0.175230 | -0.078792 | 26.770000 | 27.030001 | 26.719999 | 27.750000 | 27.75 | 27.790001 | 27.559999 | 27.309999 | 26.936666 | 26.820 | 27.075714 | 27.308 | 27.528000 | 27.606000 | 21.735399 | 21.486118 | 22.314371 | 22.314371 | 22.346525 | 22.161583 | 21.960548 | 21.660350 | 21.566534 | 21.772160 | 21.958945 | 22.135851 | 22.198572 | 27.070000 | 26.840000 | 27.740000 | 27.730000 | 27.650000 | 27.730000 | 27.129999 | 26.936666 | 26.868 | 27.111429 | 27.295 | 27.497333 | 27.552333 | 27.150000 | 27.070000 | 27.809999 | 27.799999 | 27.950001 | 28.040001 | 27.340000 | 27.130000 | 27.096000 | 27.302857 | 27.510 | 27.717333 | 27.759000 | 26.950001 | 26.600000 | 27.459999 | 27.629999 | 27.65 | 27.299999 | 27.09 | 26.780000 | 26.678 | 26.874286 | 27.108 | 27.342667 | 27.388333 | 17057200.0 | 21794500.0 | 13902600.0 | 9811900.0 | 7541300.0 | 18213200.0 | 16348500.0 | 1.743043e+07 | 22362940.0 | 2.009261e+07 | 17608410.0 | 1.493817e+07 | 1.361005e+07 |
| 2 | 2013-02-22 | 26.889999 | 27.129999 | 26.730000 | 26.770000 | 21.526327 | 16664800.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 27.165815 | 26.820 | 27.308 | 27.528000 | 27.606000 | 37.478423 | 48.958416 | -0.198438 | -0.104970 | 26.490000 | 26.820000 | 26.959999 | 26.570000 | 27.68 | 28.219999 | 27.790001 | 27.420000 | 26.873334 | 26.860 | 26.945714 | 27.181 | 27.460000 | 27.596667 | 21.566534 | 21.679117 | 21.365499 | 22.258080 | 22.692308 | 22.346525 | 22.049007 | 21.609420 | 21.598699 | 21.667624 | 21.856822 | 22.081171 | 22.191067 | 26.990000 | 26.750000 | 26.690001 | 27.700001 | 28.000000 | 27.500000 | 27.350000 | 26.983333 | 26.908 | 26.995714 | 27.220 | 27.446667 | 27.555667 | 27.049999 | 27.190001 | 27.020000 | 27.830000 | 28.320000 | 27.889999 | 27.540001 | 27.109999 | 27.118000 | 27.202857 | 27.415 | 27.662666 | 27.760667 | 26.639999 | 26.750000 | 26.450001 | 27.270000 | 27.93 | 27.350000 | 27.25 | 26.773333 | 26.734 | 26.797143 | 27.023 | 27.281333 | 27.390000 | 16936600.0 | 18297500.0 | 37728900.0 | 14931000.0 | 9623100.0 | 15212300.0 | 10162600.0 | 1.688620e+07 | 18150120.0 | 2.034030e+07 | 17828420.0 | 1.554640e+07 | 1.377650e+07 |
| 3 | 2013-02-25 | 26.790001 | 27.080000 | 26.480000 | 26.490000 | 21.301172 | 15527100.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 27.086626 | 26.860 | 27.181 | 27.460000 | 27.596667 | 33.378362 | 47.675126 | -0.235462 | -0.132994 | 26.950001 | 26.770000 | 27.030001 | 26.719999 | 27.75 | 27.879999 | 27.830000 | 27.480000 | 26.693333 | 26.814 | 26.765714 | 27.054 | 27.344667 | 27.569333 | 21.526327 | 21.735399 | 21.486118 | 22.314371 | 22.418896 | 22.378695 | 22.097254 | 21.464678 | 21.561710 | 21.522881 | 21.754699 | 21.988429 | 22.169087 | 26.889999 | 27.070000 | 26.840000 | 27.740000 | 28.010000 | 27.930000 | 27.459999 | 26.890000 | 26.898 | 26.860000 | 27.119 | 27.366000 | 27.544333 | 27.129999 | 27.150000 | 27.070000 | 27.809999 | 28.150000 | 28.030001 | 27.520000 | 27.086666 | 27.120000 | 27.098571 | 27.313 | 27.580000 | 27.752000 | 26.730000 | 26.950001 | 26.600000 | 27.459999 | 27.83 | 27.639999 | 27.17 | 26.616666 | 26.710 | 26.657143 | 26.896 | 27.184667 | 27.369667 | 16664800.0 | 17057200.0 | 21794500.0 | 13902600.0 | 8954300.0 | 14444500.0 | 8688200.0 | 1.637617e+07 | 16896640.0 | 2.057237e+07 | 18265210.0 | 1.594000e+07 | 1.374912e+07 |
| 4 | 2013-02-26 | 26.530001 | 26.980000 | 26.510000 | 26.950001 | 21.671074 | 13702900.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 26.967270 | 26.814 | 27.054 | 27.344667 | 27.569333 | 44.181951 | 48.178912 | -0.226841 | -0.152855 | 27.570000 | 26.490000 | 26.820000 | 26.959999 | 26.57 | 27.950001 | 27.780001 | 27.709999 | 26.736667 | 26.812 | 26.820000 | 26.974 | 27.282667 | 27.553667 | 21.301172 | 21.566534 | 21.679117 | 21.365499 | 22.475189 | 22.338484 | 22.282200 | 21.499524 | 21.560101 | 21.566535 | 21.690369 | 21.938574 | 22.156490 | 26.790001 | 26.990000 | 26.750000 | 26.690001 | 27.950001 | 27.830000 | 27.580000 | 26.736667 | 26.854 | 26.837143 | 26.999 | 27.267333 | 27.517000 | 27.080000 | 27.049999 | 27.190001 | 27.020000 | 28.110001 | 27.889999 | 27.740000 | 27.063333 | 27.077999 | 27.092857 | 27.231 | 27.502000 | 27.733333 | 26.480000 | 26.639999 | 26.750000 | 26.450001 | 27.85 | 27.690001 | 27.34 | 26.573333 | 26.662 | 26.665714 | 26.784 | 27.096667 | 27.345000 | 15527100.0 | 16936600.0 | 18297500.0 | 37728900.0 | 10961400.0 | 12066800.0 | 9863200.0 | 1.529827e+07 | 15977720.0 | 1.714009e+07 | 18654310.0 | 1.625657e+07 | 1.386713e+07 |
# # Calculate the index for the 70-30 split
# split_index = int(0.7 * len(df))
# # Split the DataFrame into training and testing sets
# train_df = df.iloc[:split_index]
# test_df = df.iloc[split_index:]
# Split the DataFrame into training and testing sets
train_df = df[df.date.dt.year<2020]
test_df = df[df.date.dt.year>=2020]
print(f"Train days: {len(train_df)}, Test days: {len(test_df)}")
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_df.date, y=train_df.close_1d_next, name='Training'))
fig.add_trace(go.Scatter(x=test_df.date, y=test_df.close_1d_next, name='Test'))
fig.show()
Train days: 1729, Test days: 970
drop_cols1 = ['date','open','high','low','close','adj close','volume','symbol','security',
'gics sector','gics sub-industry','headquarters location','date added','cik','founded']
train_df = train_df.drop(drop_cols1, 1)
test_df = test_df.drop(drop_cols1, 1)
# target column is next day's close price
y_train = train_df['close_1d_next'].copy()
X_train = train_df.drop(['close_1d_next'], 1)
# target column is next day's close price
y_test = test_df['close_1d_next'].copy()
X_test = test_df.drop(['close_1d_next'], 1)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_train.shape, X_train_scaled.shape, X_test.shape, X_test_scaled.shape,
((1729, 87), (1729, 87), (970, 87), (970, 87))
def train_and_evaluate_models(X_train_scaled,y_train,X_test_scaled,y_test):
"""
Train and evaluate multiple regression models on a given dataframe.
Parameters:
- dataframe: Pandas DataFrame containing the dataset.
- target_column: Name of the target column (dependent variable).
- features_columns: List of column names used as features (independent variables).
Returns:
- A DataFrame containing evaluation metrics for each model.
"""
# Split the data into features (X) and target variable (y)
# X = dataframe[features_columns]
# y = dataframe[target_column]
# Split the data into training and testing sets (70-30 split)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)
# Initialize the models
models = {
'Linear Regression': LinearRegression(),
'Ridge Regression': Ridge(),
'Lasso Regression': Lasso(),
'Elastic Net': ElasticNet(),
'SVR': SVR(),
'K-Neighbors Regressor': KNeighborsRegressor(),
'Decision Tree': DecisionTreeRegressor(),
'Random Forest': RandomForestRegressor(),
'Gradient Boosting': GradientBoostingRegressor(),
'AdaBoost': AdaBoostRegressor(),
'XGBoost': XGBRegressor(),
'CatBoost': CatBoostRegressor()
}
# Initialize a DataFrame to store the evaluation metrics
metrics_df = pd.DataFrame(columns=['Model', 'Mean Squared Error', 'Mean Absolute Error', 'R2 Score'])
# Train and evaluate each model
for model_name, model in models.items():
start_time = time.time()
# Train the model
model.fit(X_train_scaled, y_train)
end_time = time.time() # Record the end time
training_time = end_time - start_time
# Make predictions
y_pred = model.predict(X_test_scaled)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Store the metrics in the DataFrame
metrics_df = metrics_df.append({
'Model': model_name,
'Mean Squared Error': mse,
'Mean Absolute Error': mae,
'R2 Score': r2,
'Training Time (s)': training_time
}, ignore_index=True)
metrics_df = metrics_df.sort_values(by=['R2 Score'],ascending=False)
return metrics_df
df_compare = train_and_evaluate_models(X_train,y_train,X_test,y_test)
Learning rate set to 0.044643 0: learn: 6.0603979 total: 60.8ms remaining: 1m 1: learn: 5.8169612 total: 64.5ms remaining: 32.2s 2: learn: 5.5755845 total: 68.1ms remaining: 22.6s 3: learn: 5.3462380 total: 71.7ms remaining: 17.9s 4: learn: 5.1261132 total: 75.4ms remaining: 15s 5: learn: 4.9208602 total: 79.1ms remaining: 13.1s 6: learn: 4.7250714 total: 83.1ms remaining: 11.8s 7: learn: 4.5456470 total: 86.6ms remaining: 10.7s 8: learn: 4.3606091 total: 90.2ms remaining: 9.94s 9: learn: 4.1830096 total: 93.9ms remaining: 9.29s 10: learn: 4.0166823 total: 97.7ms remaining: 8.79s 11: learn: 3.8657076 total: 102ms remaining: 8.36s 12: learn: 3.7140111 total: 105ms remaining: 8.01s 13: learn: 3.5654097 total: 109ms remaining: 7.7s 14: learn: 3.4264460 total: 113ms remaining: 7.42s 15: learn: 3.2905647 total: 117ms remaining: 7.18s 16: learn: 3.1619168 total: 121ms remaining: 6.97s 17: learn: 3.0386276 total: 124ms remaining: 6.77s 18: learn: 2.9250019 total: 128ms remaining: 6.6s 19: learn: 2.8155633 total: 132ms remaining: 6.46s 20: learn: 2.7067078 total: 136ms remaining: 6.32s 21: learn: 2.6104041 total: 139ms remaining: 6.19s 22: learn: 2.5171443 total: 143ms remaining: 6.07s 23: learn: 2.4234694 total: 147ms remaining: 5.96s 24: learn: 2.3336920 total: 150ms remaining: 5.86s 25: learn: 2.2501270 total: 154ms remaining: 5.77s 26: learn: 2.1685983 total: 158ms remaining: 5.68s 27: learn: 2.0914990 total: 161ms remaining: 5.6s 28: learn: 2.0188179 total: 165ms remaining: 5.53s 29: learn: 1.9490958 total: 169ms remaining: 5.45s 30: learn: 1.8819748 total: 172ms remaining: 5.38s 31: learn: 1.8183164 total: 176ms remaining: 5.32s 32: learn: 1.7609789 total: 180ms remaining: 5.26s 33: learn: 1.7008286 total: 183ms remaining: 5.2s 34: learn: 1.6438044 total: 186ms remaining: 5.13s 35: learn: 1.5954289 total: 190ms remaining: 5.08s 36: learn: 1.5458456 total: 193ms remaining: 5.02s 37: learn: 1.4999957 total: 197ms remaining: 4.97s 38: learn: 1.4557749 total: 200ms remaining: 4.93s 39: learn: 1.4136122 total: 203ms remaining: 4.88s 40: learn: 1.3740245 total: 207ms remaining: 4.84s 41: learn: 1.3360847 total: 210ms remaining: 4.79s 42: learn: 1.3006229 total: 213ms remaining: 4.75s 43: learn: 1.2683312 total: 217ms remaining: 4.71s 44: learn: 1.2328651 total: 220ms remaining: 4.67s 45: learn: 1.2022101 total: 223ms remaining: 4.63s 46: learn: 1.1716981 total: 227ms remaining: 4.6s 47: learn: 1.1425806 total: 230ms remaining: 4.56s 48: learn: 1.1126613 total: 234ms remaining: 4.53s 49: learn: 1.0845503 total: 237ms remaining: 4.5s 50: learn: 1.0590177 total: 240ms remaining: 4.47s 51: learn: 1.0347707 total: 244ms remaining: 4.44s 52: learn: 1.0113231 total: 247ms remaining: 4.42s 53: learn: 0.9886945 total: 251ms remaining: 4.39s 54: learn: 0.9669580 total: 254ms remaining: 4.36s 55: learn: 0.9468328 total: 257ms remaining: 4.34s 56: learn: 0.9270737 total: 261ms remaining: 4.31s 57: learn: 0.9100334 total: 264ms remaining: 4.29s 58: learn: 0.8917612 total: 268ms remaining: 4.27s 59: learn: 0.8750815 total: 271ms remaining: 4.24s 60: learn: 0.8601060 total: 274ms remaining: 4.22s 61: learn: 0.8462298 total: 277ms remaining: 4.2s 62: learn: 0.8345633 total: 281ms remaining: 4.18s 63: learn: 0.8216135 total: 284ms remaining: 4.16s 64: learn: 0.8083116 total: 288ms remaining: 4.14s 65: learn: 0.7956239 total: 292ms remaining: 4.13s 66: learn: 0.7843834 total: 295ms remaining: 4.11s 67: learn: 0.7740919 total: 298ms remaining: 4.09s 68: learn: 0.7643108 total: 301ms remaining: 4.07s 69: learn: 0.7561664 total: 305ms remaining: 4.05s 70: learn: 0.7491327 total: 308ms remaining: 4.03s 71: learn: 0.7411352 total: 311ms remaining: 4.01s 72: learn: 0.7330505 total: 315ms remaining: 4s 73: learn: 0.7269962 total: 318ms remaining: 3.98s 74: learn: 0.7192950 total: 321ms remaining: 3.96s 75: learn: 0.7126213 total: 324ms remaining: 3.94s 76: learn: 0.7067352 total: 328ms remaining: 3.93s 77: learn: 0.7006922 total: 331ms remaining: 3.91s 78: learn: 0.6944183 total: 335ms remaining: 3.9s 79: learn: 0.6885217 total: 338ms remaining: 3.89s 80: learn: 0.6825927 total: 341ms remaining: 3.87s 81: learn: 0.6783627 total: 345ms remaining: 3.86s 82: learn: 0.6741644 total: 348ms remaining: 3.84s 83: learn: 0.6696567 total: 351ms remaining: 3.83s 84: learn: 0.6646154 total: 355ms remaining: 3.82s 85: learn: 0.6599463 total: 358ms remaining: 3.81s 86: learn: 0.6560602 total: 362ms remaining: 3.8s 87: learn: 0.6536929 total: 365ms remaining: 3.78s 88: learn: 0.6496752 total: 369ms remaining: 3.77s 89: learn: 0.6462499 total: 372ms remaining: 3.76s 90: learn: 0.6428735 total: 375ms remaining: 3.75s 91: learn: 0.6397495 total: 379ms remaining: 3.74s 92: learn: 0.6367543 total: 382ms remaining: 3.72s 93: learn: 0.6337726 total: 385ms remaining: 3.71s 94: learn: 0.6308954 total: 389ms remaining: 3.7s 95: learn: 0.6280612 total: 392ms remaining: 3.69s 96: learn: 0.6250764 total: 396ms remaining: 3.68s 97: learn: 0.6223572 total: 399ms remaining: 3.67s 98: learn: 0.6205398 total: 402ms remaining: 3.66s 99: learn: 0.6186628 total: 406ms remaining: 3.65s 100: learn: 0.6154184 total: 409ms remaining: 3.64s 101: learn: 0.6124974 total: 412ms remaining: 3.63s 102: learn: 0.6095369 total: 416ms remaining: 3.62s 103: learn: 0.6081161 total: 419ms remaining: 3.61s 104: learn: 0.6061648 total: 422ms remaining: 3.6s 105: learn: 0.6037361 total: 426ms remaining: 3.59s 106: learn: 0.6018697 total: 429ms remaining: 3.58s 107: learn: 0.5996563 total: 433ms remaining: 3.57s 108: learn: 0.5987782 total: 436ms remaining: 3.56s 109: learn: 0.5962953 total: 439ms remaining: 3.55s 110: learn: 0.5944525 total: 443ms remaining: 3.55s 111: learn: 0.5925792 total: 446ms remaining: 3.54s 112: learn: 0.5908725 total: 450ms remaining: 3.53s 113: learn: 0.5892615 total: 453ms remaining: 3.52s 114: learn: 0.5876082 total: 456ms remaining: 3.51s 115: learn: 0.5856574 total: 460ms remaining: 3.51s 116: learn: 0.5839197 total: 464ms remaining: 3.5s 117: learn: 0.5821181 total: 467ms remaining: 3.49s 118: learn: 0.5802687 total: 471ms remaining: 3.48s 119: learn: 0.5783092 total: 474ms remaining: 3.48s 120: learn: 0.5768204 total: 477ms remaining: 3.47s 121: learn: 0.5753784 total: 481ms remaining: 3.46s 122: learn: 0.5739414 total: 484ms remaining: 3.45s 123: learn: 0.5731172 total: 487ms remaining: 3.44s 124: learn: 0.5717661 total: 491ms remaining: 3.44s 125: learn: 0.5699490 total: 494ms remaining: 3.43s 126: learn: 0.5692480 total: 497ms remaining: 3.42s 127: learn: 0.5683714 total: 501ms remaining: 3.41s 128: learn: 0.5669671 total: 504ms remaining: 3.4s 129: learn: 0.5661305 total: 507ms remaining: 3.4s 130: learn: 0.5648214 total: 511ms remaining: 3.39s 131: learn: 0.5632118 total: 514ms remaining: 3.38s 132: learn: 0.5621830 total: 518ms remaining: 3.37s 133: learn: 0.5614073 total: 521ms remaining: 3.37s 134: learn: 0.5607392 total: 524ms remaining: 3.36s 135: learn: 0.5592399 total: 528ms remaining: 3.35s 136: learn: 0.5582825 total: 532ms remaining: 3.35s 137: learn: 0.5570203 total: 535ms remaining: 3.34s 138: learn: 0.5555467 total: 539ms remaining: 3.33s 139: learn: 0.5544866 total: 542ms remaining: 3.33s 140: learn: 0.5534618 total: 546ms remaining: 3.32s 141: learn: 0.5527745 total: 549ms remaining: 3.32s 142: learn: 0.5517276 total: 553ms remaining: 3.31s 143: learn: 0.5511754 total: 556ms remaining: 3.31s 144: learn: 0.5499848 total: 560ms remaining: 3.3s 145: learn: 0.5492370 total: 563ms remaining: 3.29s 146: learn: 0.5480831 total: 566ms remaining: 3.29s 147: learn: 0.5470207 total: 570ms remaining: 3.28s 148: learn: 0.5461768 total: 574ms remaining: 3.27s 149: learn: 0.5452187 total: 577ms remaining: 3.27s 150: learn: 0.5442155 total: 580ms remaining: 3.26s 151: learn: 0.5431025 total: 584ms remaining: 3.26s 152: learn: 0.5423642 total: 587ms remaining: 3.25s 153: learn: 0.5414228 total: 591ms remaining: 3.25s 154: learn: 0.5402482 total: 594ms remaining: 3.24s 155: learn: 0.5390607 total: 598ms remaining: 3.23s 156: learn: 0.5383473 total: 602ms remaining: 3.23s 157: learn: 0.5373131 total: 606ms remaining: 3.23s 158: learn: 0.5364392 total: 609ms remaining: 3.22s 159: learn: 0.5357421 total: 614ms remaining: 3.22s 160: learn: 0.5343277 total: 617ms remaining: 3.22s 161: learn: 0.5337785 total: 621ms remaining: 3.21s 162: learn: 0.5329012 total: 624ms remaining: 3.21s 163: learn: 0.5317662 total: 628ms remaining: 3.2s 164: learn: 0.5305788 total: 632ms remaining: 3.2s 165: learn: 0.5291670 total: 635ms remaining: 3.19s 166: learn: 0.5279711 total: 639ms remaining: 3.19s 167: learn: 0.5263201 total: 643ms remaining: 3.18s 168: learn: 0.5250413 total: 646ms remaining: 3.18s 169: learn: 0.5239617 total: 649ms remaining: 3.17s 170: learn: 0.5232086 total: 653ms remaining: 3.16s 171: learn: 0.5223393 total: 656ms remaining: 3.16s 172: learn: 0.5213785 total: 659ms remaining: 3.15s 173: learn: 0.5205693 total: 663ms remaining: 3.15s 174: learn: 0.5195080 total: 666ms remaining: 3.14s 175: learn: 0.5184230 total: 670ms remaining: 3.13s 176: learn: 0.5179592 total: 673ms remaining: 3.13s 177: learn: 0.5170185 total: 676ms remaining: 3.12s 178: learn: 0.5158612 total: 680ms remaining: 3.12s 179: learn: 0.5145391 total: 683ms remaining: 3.11s 180: learn: 0.5134332 total: 686ms remaining: 3.11s 181: learn: 0.5123862 total: 690ms remaining: 3.1s 182: learn: 0.5109381 total: 694ms remaining: 3.1s 183: learn: 0.5094690 total: 697ms remaining: 3.09s 184: learn: 0.5087636 total: 701ms remaining: 3.09s 185: learn: 0.5082193 total: 704ms remaining: 3.08s 186: learn: 0.5077741 total: 707ms remaining: 3.07s 187: learn: 0.5064784 total: 711ms remaining: 3.07s 188: learn: 0.5053126 total: 714ms remaining: 3.06s 189: learn: 0.5045861 total: 718ms remaining: 3.06s 190: learn: 0.5035784 total: 721ms remaining: 3.05s 191: learn: 0.5025377 total: 724ms remaining: 3.05s 192: learn: 0.5017748 total: 728ms remaining: 3.04s 193: learn: 0.5008812 total: 732ms remaining: 3.04s 194: learn: 0.5004256 total: 735ms remaining: 3.03s 195: learn: 0.4994655 total: 738ms remaining: 3.03s 196: learn: 0.4989930 total: 742ms remaining: 3.02s 197: learn: 0.4980951 total: 745ms remaining: 3.02s 198: learn: 0.4973099 total: 748ms remaining: 3.01s 199: learn: 0.4961640 total: 752ms remaining: 3.01s 200: learn: 0.4946290 total: 755ms remaining: 3s 201: learn: 0.4940315 total: 758ms remaining: 3s 202: learn: 0.4931395 total: 762ms remaining: 2.99s 203: learn: 0.4920884 total: 765ms remaining: 2.98s 204: learn: 0.4909474 total: 769ms remaining: 2.98s 205: learn: 0.4899796 total: 773ms remaining: 2.98s 206: learn: 0.4890645 total: 776ms remaining: 2.97s 207: learn: 0.4882374 total: 779ms remaining: 2.97s 208: learn: 0.4871172 total: 783ms remaining: 2.96s 209: learn: 0.4861811 total: 786ms remaining: 2.96s 210: learn: 0.4850126 total: 790ms remaining: 2.95s 211: learn: 0.4837164 total: 793ms remaining: 2.95s 212: learn: 0.4829656 total: 797ms remaining: 2.94s 213: learn: 0.4819662 total: 800ms remaining: 2.94s 214: learn: 0.4817394 total: 803ms remaining: 2.93s 215: learn: 0.4811177 total: 807ms remaining: 2.93s 216: learn: 0.4806224 total: 810ms remaining: 2.92s 217: learn: 0.4800808 total: 813ms remaining: 2.92s 218: learn: 0.4789126 total: 817ms remaining: 2.91s 219: learn: 0.4784037 total: 820ms remaining: 2.91s 220: learn: 0.4774369 total: 823ms remaining: 2.9s 221: learn: 0.4764525 total: 827ms remaining: 2.9s 222: learn: 0.4757860 total: 830ms remaining: 2.89s 223: learn: 0.4748362 total: 833ms remaining: 2.89s 224: learn: 0.4738011 total: 837ms remaining: 2.88s 225: learn: 0.4732533 total: 840ms remaining: 2.88s 226: learn: 0.4724302 total: 844ms remaining: 2.87s 227: learn: 0.4714185 total: 847ms remaining: 2.87s 228: learn: 0.4704901 total: 850ms remaining: 2.86s 229: learn: 0.4696393 total: 854ms remaining: 2.86s 230: learn: 0.4688642 total: 857ms remaining: 2.85s 231: learn: 0.4684056 total: 861ms remaining: 2.85s 232: learn: 0.4680263 total: 864ms remaining: 2.84s 233: learn: 0.4673195 total: 867ms remaining: 2.84s 234: learn: 0.4661429 total: 871ms remaining: 2.83s 235: learn: 0.4651517 total: 874ms remaining: 2.83s 236: learn: 0.4643784 total: 877ms remaining: 2.82s 237: learn: 0.4639422 total: 881ms remaining: 2.82s 238: learn: 0.4629527 total: 884ms remaining: 2.81s 239: learn: 0.4621494 total: 888ms remaining: 2.81s 240: learn: 0.4611603 total: 891ms remaining: 2.81s 241: learn: 0.4603838 total: 894ms remaining: 2.8s 242: learn: 0.4593452 total: 898ms remaining: 2.8s 243: learn: 0.4586434 total: 901ms remaining: 2.79s 244: learn: 0.4577788 total: 905ms remaining: 2.79s 245: learn: 0.4576161 total: 908ms remaining: 2.78s 246: learn: 0.4570526 total: 911ms remaining: 2.78s 247: learn: 0.4560632 total: 915ms remaining: 2.77s 248: learn: 0.4552049 total: 918ms remaining: 2.77s 249: learn: 0.4544847 total: 922ms remaining: 2.77s 250: learn: 0.4536628 total: 925ms remaining: 2.76s 251: learn: 0.4529050 total: 928ms remaining: 2.75s 252: learn: 0.4521824 total: 932ms remaining: 2.75s 253: learn: 0.4515072 total: 935ms remaining: 2.75s 254: learn: 0.4506698 total: 939ms remaining: 2.74s 255: learn: 0.4501205 total: 942ms remaining: 2.74s 256: learn: 0.4493848 total: 945ms remaining: 2.73s 257: learn: 0.4486159 total: 949ms remaining: 2.73s 258: learn: 0.4482442 total: 953ms remaining: 2.73s 259: learn: 0.4478827 total: 956ms remaining: 2.72s 260: learn: 0.4473978 total: 959ms remaining: 2.71s 261: learn: 0.4468994 total: 962ms remaining: 2.71s 262: learn: 0.4456701 total: 966ms remaining: 2.71s 263: learn: 0.4449110 total: 969ms remaining: 2.7s 264: learn: 0.4442542 total: 973ms remaining: 2.7s 265: learn: 0.4431941 total: 976ms remaining: 2.69s 266: learn: 0.4425103 total: 980ms remaining: 2.69s 267: learn: 0.4416661 total: 984ms remaining: 2.69s 268: learn: 0.4411630 total: 987ms remaining: 2.68s 269: learn: 0.4404441 total: 991ms remaining: 2.68s 270: learn: 0.4400363 total: 994ms remaining: 2.67s 271: learn: 0.4398475 total: 998ms remaining: 2.67s 272: learn: 0.4390363 total: 1s remaining: 2.67s 273: learn: 0.4381161 total: 1s remaining: 2.66s 274: learn: 0.4374399 total: 1.01s remaining: 2.66s 275: learn: 0.4370343 total: 1.01s remaining: 2.65s 276: learn: 0.4366440 total: 1.01s remaining: 2.65s 277: learn: 0.4363497 total: 1.02s remaining: 2.64s 278: learn: 0.4360749 total: 1.02s remaining: 2.64s 279: learn: 0.4353030 total: 1.02s remaining: 2.63s 280: learn: 0.4346082 total: 1.03s remaining: 2.63s 281: learn: 0.4342305 total: 1.03s remaining: 2.63s 282: learn: 0.4338120 total: 1.03s remaining: 2.62s 283: learn: 0.4336984 total: 1.04s remaining: 2.62s 284: learn: 0.4329844 total: 1.04s remaining: 2.61s 285: learn: 0.4325585 total: 1.04s remaining: 2.61s 286: learn: 0.4316236 total: 1.05s remaining: 2.6s 287: learn: 0.4312618 total: 1.05s remaining: 2.6s 288: learn: 0.4305788 total: 1.05s remaining: 2.6s 289: learn: 0.4299323 total: 1.06s remaining: 2.59s 290: learn: 0.4294147 total: 1.06s remaining: 2.59s 291: learn: 0.4287311 total: 1.06s remaining: 2.58s 292: learn: 0.4281021 total: 1.07s remaining: 2.58s 293: learn: 0.4274824 total: 1.07s remaining: 2.58s 294: learn: 0.4266494 total: 1.08s remaining: 2.57s 295: learn: 0.4259133 total: 1.08s remaining: 2.57s 296: learn: 0.4254208 total: 1.08s remaining: 2.56s 297: learn: 0.4248581 total: 1.09s remaining: 2.56s 298: learn: 0.4244197 total: 1.09s remaining: 2.55s 299: learn: 0.4238834 total: 1.09s remaining: 2.55s 300: learn: 0.4235753 total: 1.1s remaining: 2.55s 301: learn: 0.4230278 total: 1.1s remaining: 2.54s 302: learn: 0.4223168 total: 1.1s remaining: 2.54s 303: learn: 0.4218202 total: 1.11s remaining: 2.53s 304: learn: 0.4213324 total: 1.11s remaining: 2.53s 305: learn: 0.4209713 total: 1.11s remaining: 2.52s 306: learn: 0.4206682 total: 1.12s remaining: 2.52s 307: learn: 0.4205730 total: 1.12s remaining: 2.52s 308: learn: 0.4200366 total: 1.12s remaining: 2.51s 309: learn: 0.4196622 total: 1.13s remaining: 2.51s 310: learn: 0.4188254 total: 1.13s remaining: 2.5s 311: learn: 0.4182222 total: 1.13s remaining: 2.5s 312: learn: 0.4181272 total: 1.14s remaining: 2.49s 313: learn: 0.4178863 total: 1.14s remaining: 2.49s 314: learn: 0.4171997 total: 1.14s remaining: 2.48s 315: learn: 0.4165719 total: 1.15s remaining: 2.48s 316: learn: 0.4158275 total: 1.15s remaining: 2.48s 317: learn: 0.4150223 total: 1.15s remaining: 2.47s 318: learn: 0.4144227 total: 1.16s remaining: 2.47s 319: learn: 0.4138124 total: 1.16s remaining: 2.46s 320: learn: 0.4135332 total: 1.16s remaining: 2.46s 321: learn: 0.4129018 total: 1.17s remaining: 2.46s 322: learn: 0.4123695 total: 1.17s remaining: 2.45s 323: learn: 0.4116624 total: 1.17s remaining: 2.45s 324: learn: 0.4108634 total: 1.18s remaining: 2.44s 325: learn: 0.4103299 total: 1.18s remaining: 2.44s 326: learn: 0.4096449 total: 1.18s remaining: 2.44s 327: learn: 0.4092467 total: 1.19s remaining: 2.43s 328: learn: 0.4090363 total: 1.19s remaining: 2.43s 329: learn: 0.4080857 total: 1.2s remaining: 2.43s 330: learn: 0.4077160 total: 1.2s remaining: 2.42s 331: learn: 0.4071226 total: 1.2s remaining: 2.42s 332: learn: 0.4067749 total: 1.21s remaining: 2.42s 333: learn: 0.4062413 total: 1.21s remaining: 2.41s 334: learn: 0.4055695 total: 1.21s remaining: 2.41s 335: learn: 0.4054942 total: 1.22s remaining: 2.4s 336: learn: 0.4047835 total: 1.22s remaining: 2.4s 337: learn: 0.4042196 total: 1.22s remaining: 2.4s 338: learn: 0.4038735 total: 1.23s remaining: 2.39s 339: learn: 0.4036386 total: 1.23s remaining: 2.39s 340: learn: 0.4033409 total: 1.24s remaining: 2.39s 341: learn: 0.4026643 total: 1.24s remaining: 2.38s 342: learn: 0.4023055 total: 1.24s remaining: 2.38s 343: learn: 0.4021658 total: 1.25s remaining: 2.38s 344: learn: 0.4018412 total: 1.25s remaining: 2.37s 345: learn: 0.4013429 total: 1.25s remaining: 2.37s 346: learn: 0.4008246 total: 1.26s remaining: 2.37s 347: learn: 0.4002585 total: 1.26s remaining: 2.36s 348: learn: 0.3998110 total: 1.26s remaining: 2.36s 349: learn: 0.3990401 total: 1.27s remaining: 2.35s 350: learn: 0.3982868 total: 1.27s remaining: 2.35s 351: learn: 0.3977941 total: 1.27s remaining: 2.35s 352: learn: 0.3971540 total: 1.28s remaining: 2.34s 353: learn: 0.3966626 total: 1.28s remaining: 2.34s 354: learn: 0.3965989 total: 1.28s remaining: 2.33s 355: learn: 0.3962189 total: 1.29s remaining: 2.33s 356: learn: 0.3958068 total: 1.29s remaining: 2.33s 357: learn: 0.3953095 total: 1.29s remaining: 2.32s 358: learn: 0.3948418 total: 1.3s remaining: 2.32s 359: learn: 0.3946123 total: 1.3s remaining: 2.31s 360: learn: 0.3938100 total: 1.3s remaining: 2.31s 361: learn: 0.3932202 total: 1.31s remaining: 2.31s 362: learn: 0.3925485 total: 1.31s remaining: 2.3s 363: learn: 0.3918346 total: 1.31s remaining: 2.3s 364: learn: 0.3915180 total: 1.32s remaining: 2.29s 365: learn: 0.3907133 total: 1.32s remaining: 2.29s 366: learn: 0.3905787 total: 1.32s remaining: 2.29s 367: learn: 0.3900100 total: 1.33s remaining: 2.28s 368: learn: 0.3894581 total: 1.33s remaining: 2.28s 369: learn: 0.3887745 total: 1.33s remaining: 2.27s 370: learn: 0.3881815 total: 1.34s remaining: 2.27s 371: learn: 0.3877839 total: 1.34s remaining: 2.27s 372: learn: 0.3872649 total: 1.34s remaining: 2.26s 373: learn: 0.3864811 total: 1.35s remaining: 2.26s 374: learn: 0.3859586 total: 1.35s remaining: 2.25s 375: learn: 0.3857552 total: 1.35s remaining: 2.25s 376: learn: 0.3851249 total: 1.36s remaining: 2.25s 377: learn: 0.3846241 total: 1.36s remaining: 2.24s 378: learn: 0.3842221 total: 1.36s remaining: 2.24s 379: learn: 0.3840501 total: 1.37s remaining: 2.23s 380: learn: 0.3839565 total: 1.37s remaining: 2.23s 381: learn: 0.3834811 total: 1.38s remaining: 2.23s 382: learn: 0.3829302 total: 1.38s remaining: 2.22s 383: learn: 0.3824233 total: 1.38s remaining: 2.22s 384: learn: 0.3820862 total: 1.39s remaining: 2.21s 385: learn: 0.3817059 total: 1.39s remaining: 2.21s 386: learn: 0.3810940 total: 1.39s remaining: 2.21s 387: learn: 0.3807112 total: 1.4s remaining: 2.2s 388: learn: 0.3802605 total: 1.4s remaining: 2.2s 389: learn: 0.3798039 total: 1.4s remaining: 2.19s 390: learn: 0.3793238 total: 1.41s remaining: 2.19s 391: learn: 0.3787842 total: 1.41s remaining: 2.19s 392: learn: 0.3783469 total: 1.41s remaining: 2.18s 393: learn: 0.3774757 total: 1.42s remaining: 2.18s 394: learn: 0.3770911 total: 1.42s remaining: 2.18s 395: learn: 0.3767255 total: 1.42s remaining: 2.17s 396: learn: 0.3761264 total: 1.43s remaining: 2.17s 397: learn: 0.3757592 total: 1.43s remaining: 2.16s 398: learn: 0.3751453 total: 1.43s remaining: 2.16s 399: learn: 0.3748033 total: 1.44s remaining: 2.16s 400: learn: 0.3743184 total: 1.44s remaining: 2.15s 401: learn: 0.3740077 total: 1.44s remaining: 2.15s 402: learn: 0.3733825 total: 1.45s remaining: 2.14s 403: learn: 0.3728581 total: 1.45s remaining: 2.14s 404: learn: 0.3722381 total: 1.45s remaining: 2.14s 405: learn: 0.3716600 total: 1.46s remaining: 2.13s 406: learn: 0.3709735 total: 1.46s remaining: 2.13s 407: learn: 0.3706292 total: 1.46s remaining: 2.12s 408: learn: 0.3700405 total: 1.47s remaining: 2.12s 409: learn: 0.3695724 total: 1.47s remaining: 2.12s 410: learn: 0.3688604 total: 1.47s remaining: 2.11s 411: learn: 0.3687772 total: 1.48s remaining: 2.11s 412: learn: 0.3680599 total: 1.48s remaining: 2.1s 413: learn: 0.3676867 total: 1.48s remaining: 2.1s 414: learn: 0.3673532 total: 1.49s remaining: 2.1s 415: learn: 0.3667493 total: 1.49s remaining: 2.09s 416: learn: 0.3663581 total: 1.5s remaining: 2.09s 417: learn: 0.3659273 total: 1.5s remaining: 2.09s 418: learn: 0.3653585 total: 1.5s remaining: 2.08s 419: learn: 0.3647104 total: 1.5s remaining: 2.08s 420: learn: 0.3643467 total: 1.51s remaining: 2.07s 421: learn: 0.3638706 total: 1.51s remaining: 2.07s 422: learn: 0.3637044 total: 1.51s remaining: 2.07s 423: learn: 0.3630532 total: 1.52s remaining: 2.06s 424: learn: 0.3628976 total: 1.52s remaining: 2.06s 425: learn: 0.3622257 total: 1.52s remaining: 2.06s 426: learn: 0.3618914 total: 1.53s remaining: 2.05s 427: learn: 0.3614434 total: 1.53s remaining: 2.05s 428: learn: 0.3610056 total: 1.53s remaining: 2.04s 429: learn: 0.3603069 total: 1.54s remaining: 2.04s 430: learn: 0.3599433 total: 1.54s remaining: 2.04s 431: learn: 0.3598531 total: 1.54s remaining: 2.03s 432: learn: 0.3597998 total: 1.55s remaining: 2.03s 433: learn: 0.3592574 total: 1.55s remaining: 2.02s 434: learn: 0.3586937 total: 1.55s remaining: 2.02s 435: learn: 0.3582985 total: 1.56s remaining: 2.02s 436: learn: 0.3579804 total: 1.56s remaining: 2.01s 437: learn: 0.3576664 total: 1.56s remaining: 2.01s 438: learn: 0.3572953 total: 1.57s remaining: 2s 439: learn: 0.3568969 total: 1.57s remaining: 2s 440: learn: 0.3562306 total: 1.58s remaining: 2s 441: learn: 0.3555624 total: 1.58s remaining: 1.99s 442: learn: 0.3550610 total: 1.58s remaining: 1.99s 443: learn: 0.3545855 total: 1.59s remaining: 1.99s 444: learn: 0.3541245 total: 1.59s remaining: 1.98s 445: learn: 0.3537684 total: 1.59s remaining: 1.98s 446: learn: 0.3534726 total: 1.6s remaining: 1.97s 447: learn: 0.3533520 total: 1.6s remaining: 1.97s 448: learn: 0.3530596 total: 1.6s remaining: 1.97s 449: learn: 0.3525138 total: 1.61s remaining: 1.96s 450: learn: 0.3520520 total: 1.61s remaining: 1.96s 451: learn: 0.3517294 total: 1.61s remaining: 1.96s 452: learn: 0.3516567 total: 1.62s remaining: 1.95s 453: learn: 0.3513043 total: 1.62s remaining: 1.95s 454: learn: 0.3509406 total: 1.62s remaining: 1.94s 455: learn: 0.3504919 total: 1.63s remaining: 1.94s 456: learn: 0.3498914 total: 1.63s remaining: 1.94s 457: learn: 0.3494417 total: 1.63s remaining: 1.93s 458: learn: 0.3490944 total: 1.64s remaining: 1.93s 459: learn: 0.3487102 total: 1.64s remaining: 1.92s 460: learn: 0.3484557 total: 1.64s remaining: 1.92s 461: learn: 0.3478789 total: 1.65s remaining: 1.92s 462: learn: 0.3474275 total: 1.65s remaining: 1.91s 463: learn: 0.3468342 total: 1.65s remaining: 1.91s 464: learn: 0.3463040 total: 1.66s remaining: 1.91s 465: learn: 0.3458213 total: 1.66s remaining: 1.9s 466: learn: 0.3451803 total: 1.66s remaining: 1.9s 467: learn: 0.3447292 total: 1.67s remaining: 1.89s 468: learn: 0.3443530 total: 1.67s remaining: 1.89s 469: learn: 0.3437850 total: 1.67s remaining: 1.89s 470: learn: 0.3433565 total: 1.68s remaining: 1.88s 471: learn: 0.3430048 total: 1.68s remaining: 1.88s 472: learn: 0.3425770 total: 1.68s remaining: 1.88s 473: learn: 0.3419536 total: 1.69s remaining: 1.87s 474: learn: 0.3414171 total: 1.69s remaining: 1.87s 475: learn: 0.3409048 total: 1.69s remaining: 1.86s 476: learn: 0.3406389 total: 1.7s remaining: 1.86s 477: learn: 0.3402231 total: 1.7s remaining: 1.86s 478: learn: 0.3397786 total: 1.7s remaining: 1.85s 479: learn: 0.3396398 total: 1.71s remaining: 1.85s 480: learn: 0.3395825 total: 1.71s remaining: 1.85s 481: learn: 0.3391371 total: 1.71s remaining: 1.84s 482: learn: 0.3386234 total: 1.72s remaining: 1.84s 483: learn: 0.3382257 total: 1.72s remaining: 1.83s 484: learn: 0.3378495 total: 1.72s remaining: 1.83s 485: learn: 0.3375195 total: 1.73s remaining: 1.83s 486: learn: 0.3371611 total: 1.73s remaining: 1.82s 487: learn: 0.3367422 total: 1.73s remaining: 1.82s 488: learn: 0.3363054 total: 1.74s remaining: 1.82s 489: learn: 0.3359786 total: 1.74s remaining: 1.81s 490: learn: 0.3356998 total: 1.75s remaining: 1.81s 491: learn: 0.3354453 total: 1.75s remaining: 1.8s 492: learn: 0.3352187 total: 1.75s remaining: 1.8s 493: learn: 0.3350124 total: 1.75s remaining: 1.8s 494: learn: 0.3346929 total: 1.76s remaining: 1.79s 495: learn: 0.3342032 total: 1.76s remaining: 1.79s 496: learn: 0.3336972 total: 1.77s remaining: 1.79s 497: learn: 0.3336357 total: 1.77s remaining: 1.78s 498: learn: 0.3333691 total: 1.77s remaining: 1.78s 499: learn: 0.3329063 total: 1.78s remaining: 1.78s 500: learn: 0.3326459 total: 1.78s remaining: 1.77s 501: learn: 0.3323511 total: 1.78s remaining: 1.77s 502: learn: 0.3319477 total: 1.79s remaining: 1.76s 503: learn: 0.3317364 total: 1.79s remaining: 1.76s 504: learn: 0.3312039 total: 1.79s remaining: 1.76s 505: learn: 0.3305307 total: 1.8s remaining: 1.75s 506: learn: 0.3300698 total: 1.8s remaining: 1.75s 507: learn: 0.3296915 total: 1.8s remaining: 1.75s 508: learn: 0.3294125 total: 1.81s remaining: 1.74s 509: learn: 0.3291011 total: 1.81s remaining: 1.74s 510: learn: 0.3289021 total: 1.81s remaining: 1.74s 511: learn: 0.3285137 total: 1.82s remaining: 1.73s 512: learn: 0.3278964 total: 1.82s remaining: 1.73s 513: learn: 0.3274555 total: 1.82s remaining: 1.72s 514: learn: 0.3270691 total: 1.83s remaining: 1.72s 515: learn: 0.3268052 total: 1.83s remaining: 1.72s 516: learn: 0.3263295 total: 1.83s remaining: 1.71s 517: learn: 0.3258524 total: 1.84s remaining: 1.71s 518: learn: 0.3255794 total: 1.84s remaining: 1.71s 519: learn: 0.3251552 total: 1.84s remaining: 1.7s 520: learn: 0.3247569 total: 1.85s remaining: 1.7s 521: learn: 0.3242420 total: 1.85s remaining: 1.7s 522: learn: 0.3240746 total: 1.85s remaining: 1.69s 523: learn: 0.3239854 total: 1.86s remaining: 1.69s 524: learn: 0.3239415 total: 1.86s remaining: 1.68s 525: learn: 0.3236346 total: 1.86s remaining: 1.68s 526: learn: 0.3233439 total: 1.87s remaining: 1.68s 527: learn: 0.3232261 total: 1.87s remaining: 1.67s 528: learn: 0.3226489 total: 1.87s remaining: 1.67s 529: learn: 0.3221208 total: 1.88s remaining: 1.67s 530: learn: 0.3217815 total: 1.88s remaining: 1.66s 531: learn: 0.3216486 total: 1.88s remaining: 1.66s 532: learn: 0.3213358 total: 1.89s remaining: 1.65s 533: learn: 0.3211304 total: 1.89s remaining: 1.65s 534: learn: 0.3210996 total: 1.89s remaining: 1.65s 535: learn: 0.3207515 total: 1.9s remaining: 1.64s 536: learn: 0.3204738 total: 1.9s remaining: 1.64s 537: learn: 0.3198882 total: 1.9s remaining: 1.64s 538: learn: 0.3195154 total: 1.91s remaining: 1.63s 539: learn: 0.3192952 total: 1.91s remaining: 1.63s 540: learn: 0.3190280 total: 1.92s remaining: 1.62s 541: learn: 0.3187129 total: 1.92s remaining: 1.62s 542: learn: 0.3181960 total: 1.92s remaining: 1.62s 543: learn: 0.3176606 total: 1.93s remaining: 1.61s 544: learn: 0.3170949 total: 1.93s remaining: 1.61s 545: learn: 0.3167459 total: 1.93s remaining: 1.61s 546: learn: 0.3163222 total: 1.94s remaining: 1.6s 547: learn: 0.3161420 total: 1.94s remaining: 1.6s 548: learn: 0.3157992 total: 1.94s remaining: 1.6s 549: learn: 0.3152920 total: 1.95s remaining: 1.59s 550: learn: 0.3148228 total: 1.95s remaining: 1.59s 551: learn: 0.3145167 total: 1.95s remaining: 1.58s 552: learn: 0.3139810 total: 1.96s remaining: 1.58s 553: learn: 0.3133923 total: 1.96s remaining: 1.58s 554: learn: 0.3129725 total: 1.96s remaining: 1.57s 555: learn: 0.3125777 total: 1.97s remaining: 1.57s 556: learn: 0.3123591 total: 1.97s remaining: 1.57s 557: learn: 0.3118976 total: 1.97s remaining: 1.56s 558: learn: 0.3117266 total: 1.98s remaining: 1.56s 559: learn: 0.3114944 total: 1.98s remaining: 1.56s 560: learn: 0.3112216 total: 1.98s remaining: 1.55s 561: learn: 0.3108621 total: 1.99s remaining: 1.55s 562: learn: 0.3104885 total: 1.99s remaining: 1.54s 563: learn: 0.3103672 total: 1.99s remaining: 1.54s 564: learn: 0.3099546 total: 2s remaining: 1.54s 565: learn: 0.3094861 total: 2s remaining: 1.53s 566: learn: 0.3091734 total: 2s remaining: 1.53s 567: learn: 0.3089148 total: 2.01s remaining: 1.53s 568: learn: 0.3086673 total: 2.01s remaining: 1.52s 569: learn: 0.3082695 total: 2.01s remaining: 1.52s 570: learn: 0.3078891 total: 2.02s remaining: 1.52s 571: learn: 0.3074927 total: 2.02s remaining: 1.51s 572: learn: 0.3069862 total: 2.02s remaining: 1.51s 573: learn: 0.3064039 total: 2.03s remaining: 1.5s 574: learn: 0.3061550 total: 2.03s remaining: 1.5s 575: learn: 0.3056908 total: 2.04s remaining: 1.5s 576: learn: 0.3052593 total: 2.04s remaining: 1.49s 577: learn: 0.3049534 total: 2.04s remaining: 1.49s 578: learn: 0.3044688 total: 2.04s remaining: 1.49s 579: learn: 0.3041772 total: 2.05s remaining: 1.48s 580: learn: 0.3037533 total: 2.05s remaining: 1.48s 581: learn: 0.3035700 total: 2.06s remaining: 1.48s 582: learn: 0.3031629 total: 2.06s remaining: 1.47s 583: learn: 0.3028838 total: 2.06s remaining: 1.47s 584: learn: 0.3023030 total: 2.07s remaining: 1.47s 585: learn: 0.3019222 total: 2.07s remaining: 1.46s 586: learn: 0.3015915 total: 2.07s remaining: 1.46s 587: learn: 0.3015704 total: 2.08s remaining: 1.46s 588: learn: 0.3009768 total: 2.08s remaining: 1.45s 589: learn: 0.3008180 total: 2.08s remaining: 1.45s 590: learn: 0.3005118 total: 2.09s remaining: 1.44s 591: learn: 0.3002814 total: 2.09s remaining: 1.44s 592: learn: 0.3000431 total: 2.09s remaining: 1.44s 593: learn: 0.2997323 total: 2.1s remaining: 1.43s 594: learn: 0.2994389 total: 2.1s remaining: 1.43s 595: learn: 0.2993389 total: 2.1s remaining: 1.43s 596: learn: 0.2988775 total: 2.11s remaining: 1.42s 597: learn: 0.2987598 total: 2.11s remaining: 1.42s 598: learn: 0.2984033 total: 2.12s remaining: 1.42s 599: learn: 0.2978245 total: 2.12s remaining: 1.41s 600: learn: 0.2974830 total: 2.12s remaining: 1.41s 601: learn: 0.2969405 total: 2.13s remaining: 1.4s 602: learn: 0.2966278 total: 2.13s remaining: 1.4s 603: learn: 0.2962533 total: 2.13s remaining: 1.4s 604: learn: 0.2959855 total: 2.14s remaining: 1.39s 605: learn: 0.2956097 total: 2.14s remaining: 1.39s 606: learn: 0.2953299 total: 2.14s remaining: 1.39s 607: learn: 0.2951260 total: 2.15s remaining: 1.39s 608: learn: 0.2947087 total: 2.15s remaining: 1.38s 609: learn: 0.2944823 total: 2.16s remaining: 1.38s 610: learn: 0.2943654 total: 2.16s remaining: 1.38s 611: learn: 0.2941610 total: 2.16s remaining: 1.37s 612: learn: 0.2938108 total: 2.17s remaining: 1.37s 613: learn: 0.2934358 total: 2.17s remaining: 1.36s 614: learn: 0.2931197 total: 2.17s remaining: 1.36s 615: learn: 0.2930426 total: 2.18s remaining: 1.36s 616: learn: 0.2928295 total: 2.18s remaining: 1.35s 617: learn: 0.2924921 total: 2.18s remaining: 1.35s 618: learn: 0.2921751 total: 2.19s remaining: 1.35s 619: learn: 0.2918991 total: 2.19s remaining: 1.34s 620: learn: 0.2913899 total: 2.19s remaining: 1.34s 621: learn: 0.2908501 total: 2.2s remaining: 1.34s 622: learn: 0.2905626 total: 2.2s remaining: 1.33s 623: learn: 0.2902904 total: 2.21s remaining: 1.33s 624: learn: 0.2900379 total: 2.21s remaining: 1.32s 625: learn: 0.2896903 total: 2.21s remaining: 1.32s 626: learn: 0.2892638 total: 2.22s remaining: 1.32s 627: learn: 0.2889578 total: 2.22s remaining: 1.31s 628: learn: 0.2887475 total: 2.22s remaining: 1.31s 629: learn: 0.2883429 total: 2.23s remaining: 1.31s 630: learn: 0.2881113 total: 2.23s remaining: 1.3s 631: learn: 0.2876183 total: 2.23s remaining: 1.3s 632: learn: 0.2875565 total: 2.24s remaining: 1.3s 633: learn: 0.2871677 total: 2.24s remaining: 1.29s 634: learn: 0.2868290 total: 2.24s remaining: 1.29s 635: learn: 0.2866803 total: 2.25s remaining: 1.29s 636: learn: 0.2864461 total: 2.25s remaining: 1.28s 637: learn: 0.2861437 total: 2.25s remaining: 1.28s 638: learn: 0.2858220 total: 2.26s remaining: 1.27s 639: learn: 0.2856065 total: 2.26s remaining: 1.27s 640: learn: 0.2853185 total: 2.26s remaining: 1.27s 641: learn: 0.2849693 total: 2.27s remaining: 1.26s 642: learn: 0.2844873 total: 2.27s remaining: 1.26s 643: learn: 0.2841484 total: 2.27s remaining: 1.26s 644: learn: 0.2838365 total: 2.28s remaining: 1.25s 645: learn: 0.2835952 total: 2.28s remaining: 1.25s 646: learn: 0.2831927 total: 2.28s remaining: 1.25s 647: learn: 0.2827722 total: 2.29s remaining: 1.24s 648: learn: 0.2824569 total: 2.29s remaining: 1.24s 649: learn: 0.2821450 total: 2.29s remaining: 1.24s 650: learn: 0.2820499 total: 2.3s remaining: 1.23s 651: learn: 0.2818476 total: 2.3s remaining: 1.23s 652: learn: 0.2815995 total: 2.31s remaining: 1.23s 653: learn: 0.2812118 total: 2.31s remaining: 1.22s 654: learn: 0.2809202 total: 2.31s remaining: 1.22s 655: learn: 0.2806815 total: 2.32s remaining: 1.22s 656: learn: 0.2802227 total: 2.32s remaining: 1.21s 657: learn: 0.2800624 total: 2.33s remaining: 1.21s 658: learn: 0.2795849 total: 2.33s remaining: 1.21s 659: learn: 0.2793078 total: 2.33s remaining: 1.2s 660: learn: 0.2791128 total: 2.34s remaining: 1.2s 661: learn: 0.2788399 total: 2.34s remaining: 1.19s 662: learn: 0.2788037 total: 2.34s remaining: 1.19s 663: learn: 0.2786668 total: 2.35s remaining: 1.19s 664: learn: 0.2784946 total: 2.35s remaining: 1.18s 665: learn: 0.2780830 total: 2.35s remaining: 1.18s 666: learn: 0.2778176 total: 2.36s remaining: 1.18s 667: learn: 0.2775362 total: 2.36s remaining: 1.17s 668: learn: 0.2771948 total: 2.36s remaining: 1.17s 669: learn: 0.2770151 total: 2.37s remaining: 1.17s 670: learn: 0.2766705 total: 2.37s remaining: 1.16s 671: learn: 0.2761992 total: 2.38s remaining: 1.16s 672: learn: 0.2761667 total: 2.38s remaining: 1.16s 673: learn: 0.2758894 total: 2.38s remaining: 1.15s 674: learn: 0.2754654 total: 2.39s remaining: 1.15s 675: learn: 0.2751694 total: 2.39s remaining: 1.15s 676: learn: 0.2747818 total: 2.39s remaining: 1.14s 677: learn: 0.2743284 total: 2.4s remaining: 1.14s 678: learn: 0.2738872 total: 2.4s remaining: 1.13s 679: learn: 0.2735984 total: 2.4s remaining: 1.13s 680: learn: 0.2732869 total: 2.41s remaining: 1.13s 681: learn: 0.2730972 total: 2.41s remaining: 1.12s 682: learn: 0.2728736 total: 2.41s remaining: 1.12s 683: learn: 0.2726755 total: 2.42s remaining: 1.12s 684: learn: 0.2723927 total: 2.42s remaining: 1.11s 685: learn: 0.2719884 total: 2.42s remaining: 1.11s 686: learn: 0.2715762 total: 2.43s remaining: 1.1s 687: learn: 0.2713579 total: 2.43s remaining: 1.1s 688: learn: 0.2711750 total: 2.43s remaining: 1.1s 689: learn: 0.2709947 total: 2.44s remaining: 1.09s 690: learn: 0.2707097 total: 2.44s remaining: 1.09s 691: learn: 0.2704260 total: 2.44s remaining: 1.09s 692: learn: 0.2701645 total: 2.45s remaining: 1.08s 693: learn: 0.2698393 total: 2.45s remaining: 1.08s 694: learn: 0.2695105 total: 2.45s remaining: 1.08s 695: learn: 0.2691283 total: 2.46s remaining: 1.07s 696: learn: 0.2688368 total: 2.46s remaining: 1.07s 697: learn: 0.2683466 total: 2.46s remaining: 1.07s 698: learn: 0.2680603 total: 2.47s remaining: 1.06s 699: learn: 0.2677699 total: 2.47s remaining: 1.06s 700: learn: 0.2674562 total: 2.48s remaining: 1.05s 701: learn: 0.2671464 total: 2.48s remaining: 1.05s 702: learn: 0.2668508 total: 2.48s remaining: 1.05s 703: learn: 0.2666627 total: 2.48s remaining: 1.04s 704: learn: 0.2663736 total: 2.49s remaining: 1.04s 705: learn: 0.2660888 total: 2.49s remaining: 1.04s 706: learn: 0.2658267 total: 2.5s remaining: 1.03s 707: learn: 0.2656145 total: 2.5s remaining: 1.03s 708: learn: 0.2654194 total: 2.5s remaining: 1.03s 709: learn: 0.2651461 total: 2.51s remaining: 1.02s 710: learn: 0.2647736 total: 2.51s remaining: 1.02s 711: learn: 0.2644030 total: 2.51s remaining: 1.02s 712: learn: 0.2641493 total: 2.52s remaining: 1.01s 713: learn: 0.2636034 total: 2.52s remaining: 1.01s 714: learn: 0.2633467 total: 2.52s remaining: 1.01s 715: learn: 0.2630884 total: 2.53s remaining: 1s 716: learn: 0.2627231 total: 2.53s remaining: 999ms 717: learn: 0.2623737 total: 2.53s remaining: 995ms 718: learn: 0.2620890 total: 2.54s remaining: 992ms 719: learn: 0.2619557 total: 2.54s remaining: 988ms 720: learn: 0.2615330 total: 2.54s remaining: 985ms 721: learn: 0.2613678 total: 2.55s remaining: 982ms 722: learn: 0.2609768 total: 2.55s remaining: 978ms 723: learn: 0.2607843 total: 2.56s remaining: 974ms 724: learn: 0.2604017 total: 2.56s remaining: 971ms 725: learn: 0.2601656 total: 2.56s remaining: 968ms 726: learn: 0.2598170 total: 2.57s remaining: 964ms 727: learn: 0.2596272 total: 2.57s remaining: 960ms 728: learn: 0.2594368 total: 2.57s remaining: 957ms 729: learn: 0.2591430 total: 2.58s remaining: 953ms 730: learn: 0.2588290 total: 2.58s remaining: 950ms 731: learn: 0.2585789 total: 2.58s remaining: 946ms 732: learn: 0.2584455 total: 2.59s remaining: 943ms 733: learn: 0.2584104 total: 2.59s remaining: 939ms 734: learn: 0.2582152 total: 2.59s remaining: 935ms 735: learn: 0.2581397 total: 2.6s remaining: 932ms 736: learn: 0.2578583 total: 2.6s remaining: 929ms 737: learn: 0.2576197 total: 2.6s remaining: 925ms 738: learn: 0.2573741 total: 2.61s remaining: 922ms 739: learn: 0.2571082 total: 2.61s remaining: 918ms 740: learn: 0.2568704 total: 2.62s remaining: 915ms 741: learn: 0.2566573 total: 2.62s remaining: 911ms 742: learn: 0.2564107 total: 2.62s remaining: 907ms 743: learn: 0.2561631 total: 2.63s remaining: 904ms 744: learn: 0.2561312 total: 2.63s remaining: 901ms 745: learn: 0.2557099 total: 2.63s remaining: 897ms 746: learn: 0.2554609 total: 2.64s remaining: 894ms 747: learn: 0.2552453 total: 2.64s remaining: 890ms 748: learn: 0.2549054 total: 2.65s remaining: 887ms 749: learn: 0.2546371 total: 2.65s remaining: 884ms 750: learn: 0.2543651 total: 2.65s remaining: 880ms 751: learn: 0.2537761 total: 2.66s remaining: 876ms 752: learn: 0.2535814 total: 2.66s remaining: 873ms 753: learn: 0.2531956 total: 2.67s remaining: 870ms 754: learn: 0.2531389 total: 2.67s remaining: 866ms 755: learn: 0.2529298 total: 2.67s remaining: 862ms 756: learn: 0.2527376 total: 2.68s remaining: 859ms 757: learn: 0.2524734 total: 2.68s remaining: 856ms 758: learn: 0.2521595 total: 2.68s remaining: 852ms 759: learn: 0.2519621 total: 2.69s remaining: 849ms 760: learn: 0.2516846 total: 2.69s remaining: 846ms 761: learn: 0.2512891 total: 2.7s remaining: 842ms 762: learn: 0.2510728 total: 2.7s remaining: 839ms 763: learn: 0.2507176 total: 2.7s remaining: 835ms 764: learn: 0.2503362 total: 2.71s remaining: 832ms 765: learn: 0.2502037 total: 2.71s remaining: 829ms 766: learn: 0.2498936 total: 2.71s remaining: 825ms 767: learn: 0.2496633 total: 2.72s remaining: 821ms 768: learn: 0.2493097 total: 2.72s remaining: 818ms 769: learn: 0.2489150 total: 2.73s remaining: 815ms 770: learn: 0.2486851 total: 2.73s remaining: 811ms 771: learn: 0.2483194 total: 2.73s remaining: 807ms 772: learn: 0.2479421 total: 2.74s remaining: 804ms 773: learn: 0.2478561 total: 2.74s remaining: 801ms 774: learn: 0.2475209 total: 2.75s remaining: 797ms 775: learn: 0.2471628 total: 2.75s remaining: 794ms 776: learn: 0.2469314 total: 2.75s remaining: 790ms 777: learn: 0.2466425 total: 2.76s remaining: 787ms 778: learn: 0.2465065 total: 2.76s remaining: 783ms 779: learn: 0.2461220 total: 2.76s remaining: 780ms 780: learn: 0.2458115 total: 2.77s remaining: 776ms 781: learn: 0.2456707 total: 2.77s remaining: 773ms 782: learn: 0.2454929 total: 2.77s remaining: 769ms 783: learn: 0.2451706 total: 2.78s remaining: 766ms 784: learn: 0.2448523 total: 2.78s remaining: 762ms 785: learn: 0.2445772 total: 2.79s remaining: 759ms 786: learn: 0.2443875 total: 2.79s remaining: 755ms 787: learn: 0.2441339 total: 2.79s remaining: 751ms 788: learn: 0.2438773 total: 2.8s remaining: 748ms 789: learn: 0.2438140 total: 2.8s remaining: 744ms 790: learn: 0.2434810 total: 2.8s remaining: 741ms 791: learn: 0.2429906 total: 2.81s remaining: 737ms 792: learn: 0.2426931 total: 2.81s remaining: 734ms 793: learn: 0.2424043 total: 2.81s remaining: 730ms 794: learn: 0.2421210 total: 2.82s remaining: 726ms 795: learn: 0.2419921 total: 2.82s remaining: 723ms 796: learn: 0.2418543 total: 2.82s remaining: 719ms 797: learn: 0.2418231 total: 2.83s remaining: 716ms 798: learn: 0.2415865 total: 2.83s remaining: 712ms 799: learn: 0.2412327 total: 2.83s remaining: 709ms 800: learn: 0.2410130 total: 2.84s remaining: 705ms 801: learn: 0.2406898 total: 2.84s remaining: 702ms 802: learn: 0.2403997 total: 2.85s remaining: 698ms 803: learn: 0.2403092 total: 2.85s remaining: 694ms 804: learn: 0.2399602 total: 2.85s remaining: 691ms 805: learn: 0.2397046 total: 2.85s remaining: 687ms 806: learn: 0.2395811 total: 2.86s remaining: 684ms 807: learn: 0.2393554 total: 2.86s remaining: 680ms 808: learn: 0.2389797 total: 2.87s remaining: 677ms 809: learn: 0.2386822 total: 2.87s remaining: 673ms 810: learn: 0.2385272 total: 2.87s remaining: 669ms 811: learn: 0.2383253 total: 2.88s remaining: 666ms 812: learn: 0.2382769 total: 2.88s remaining: 662ms 813: learn: 0.2380034 total: 2.88s remaining: 659ms 814: learn: 0.2378123 total: 2.88s remaining: 655ms 815: learn: 0.2376812 total: 2.89s remaining: 651ms 816: learn: 0.2375900 total: 2.89s remaining: 648ms 817: learn: 0.2373746 total: 2.9s remaining: 644ms 818: learn: 0.2368950 total: 2.9s remaining: 641ms 819: learn: 0.2365626 total: 2.9s remaining: 637ms 820: learn: 0.2364160 total: 2.91s remaining: 634ms 821: learn: 0.2362066 total: 2.91s remaining: 630ms 822: learn: 0.2357433 total: 2.91s remaining: 627ms 823: learn: 0.2355107 total: 2.92s remaining: 623ms 824: learn: 0.2353031 total: 2.92s remaining: 620ms 825: learn: 0.2349189 total: 2.92s remaining: 616ms 826: learn: 0.2345722 total: 2.93s remaining: 612ms 827: learn: 0.2342914 total: 2.93s remaining: 609ms 828: learn: 0.2340122 total: 2.93s remaining: 605ms 829: learn: 0.2337871 total: 2.94s remaining: 602ms 830: learn: 0.2337141 total: 2.94s remaining: 598ms 831: learn: 0.2334017 total: 2.94s remaining: 595ms 832: learn: 0.2331498 total: 2.95s remaining: 591ms 833: learn: 0.2329182 total: 2.95s remaining: 588ms 834: learn: 0.2326496 total: 2.96s remaining: 584ms 835: learn: 0.2323721 total: 2.96s remaining: 580ms 836: learn: 0.2321663 total: 2.96s remaining: 577ms 837: learn: 0.2317915 total: 2.96s remaining: 573ms 838: learn: 0.2317042 total: 2.97s remaining: 570ms 839: learn: 0.2315415 total: 2.97s remaining: 566ms 840: learn: 0.2312144 total: 2.98s remaining: 563ms 841: learn: 0.2309861 total: 2.98s remaining: 559ms 842: learn: 0.2308992 total: 2.98s remaining: 556ms 843: learn: 0.2307248 total: 2.99s remaining: 552ms 844: learn: 0.2304123 total: 2.99s remaining: 548ms 845: learn: 0.2302900 total: 2.99s remaining: 545ms 846: learn: 0.2299298 total: 3s remaining: 541ms 847: learn: 0.2297649 total: 3s remaining: 538ms 848: learn: 0.2296314 total: 3s remaining: 534ms 849: learn: 0.2292939 total: 3.01s remaining: 531ms 850: learn: 0.2291602 total: 3.01s remaining: 527ms 851: learn: 0.2288865 total: 3.01s remaining: 523ms 852: learn: 0.2286853 total: 3.02s remaining: 520ms 853: learn: 0.2285428 total: 3.02s remaining: 516ms 854: learn: 0.2284197 total: 3.02s remaining: 513ms 855: learn: 0.2282277 total: 3.03s remaining: 509ms 856: learn: 0.2278907 total: 3.03s remaining: 506ms 857: learn: 0.2277383 total: 3.03s remaining: 502ms 858: learn: 0.2275067 total: 3.04s remaining: 499ms 859: learn: 0.2272363 total: 3.04s remaining: 495ms 860: learn: 0.2269635 total: 3.04s remaining: 492ms 861: learn: 0.2265979 total: 3.05s remaining: 488ms 862: learn: 0.2264905 total: 3.05s remaining: 484ms 863: learn: 0.2262386 total: 3.06s remaining: 481ms 864: learn: 0.2260311 total: 3.06s remaining: 477ms 865: learn: 0.2258230 total: 3.06s remaining: 474ms 866: learn: 0.2255610 total: 3.06s remaining: 470ms 867: learn: 0.2254607 total: 3.07s remaining: 467ms 868: learn: 0.2250433 total: 3.07s remaining: 463ms 869: learn: 0.2249648 total: 3.08s remaining: 460ms 870: learn: 0.2247922 total: 3.08s remaining: 456ms 871: learn: 0.2244233 total: 3.08s remaining: 452ms 872: learn: 0.2242406 total: 3.08s remaining: 449ms 873: learn: 0.2240229 total: 3.09s remaining: 445ms 874: learn: 0.2238258 total: 3.09s remaining: 442ms 875: learn: 0.2237571 total: 3.1s remaining: 438ms 876: learn: 0.2235548 total: 3.1s remaining: 435ms 877: learn: 0.2234135 total: 3.1s remaining: 431ms 878: learn: 0.2230922 total: 3.11s remaining: 428ms 879: learn: 0.2226509 total: 3.11s remaining: 424ms 880: learn: 0.2224404 total: 3.11s remaining: 421ms 881: learn: 0.2223705 total: 3.12s remaining: 417ms 882: learn: 0.2223260 total: 3.12s remaining: 414ms 883: learn: 0.2220318 total: 3.13s remaining: 410ms 884: learn: 0.2218552 total: 3.13s remaining: 407ms 885: learn: 0.2217687 total: 3.13s remaining: 403ms 886: learn: 0.2214940 total: 3.13s remaining: 399ms 887: learn: 0.2213258 total: 3.14s remaining: 396ms 888: learn: 0.2212884 total: 3.14s remaining: 392ms 889: learn: 0.2210636 total: 3.15s remaining: 389ms 890: learn: 0.2208620 total: 3.15s remaining: 385ms 891: learn: 0.2207268 total: 3.15s remaining: 382ms 892: learn: 0.2205061 total: 3.16s remaining: 378ms 893: learn: 0.2203278 total: 3.16s remaining: 375ms 894: learn: 0.2200449 total: 3.16s remaining: 371ms 895: learn: 0.2197147 total: 3.17s remaining: 368ms 896: learn: 0.2195113 total: 3.17s remaining: 364ms 897: learn: 0.2192671 total: 3.17s remaining: 360ms 898: learn: 0.2192258 total: 3.18s remaining: 357ms 899: learn: 0.2189515 total: 3.18s remaining: 353ms 900: learn: 0.2186505 total: 3.18s remaining: 350ms 901: learn: 0.2184193 total: 3.19s remaining: 346ms 902: learn: 0.2183672 total: 3.19s remaining: 343ms 903: learn: 0.2181273 total: 3.19s remaining: 339ms 904: learn: 0.2178674 total: 3.2s remaining: 336ms 905: learn: 0.2176005 total: 3.2s remaining: 332ms 906: learn: 0.2174875 total: 3.2s remaining: 329ms 907: learn: 0.2171777 total: 3.21s remaining: 325ms 908: learn: 0.2169544 total: 3.21s remaining: 322ms 909: learn: 0.2167341 total: 3.22s remaining: 318ms 910: learn: 0.2166133 total: 3.22s remaining: 315ms 911: learn: 0.2164535 total: 3.22s remaining: 311ms 912: learn: 0.2164128 total: 3.23s remaining: 307ms 913: learn: 0.2162686 total: 3.23s remaining: 304ms 914: learn: 0.2160413 total: 3.23s remaining: 300ms 915: learn: 0.2158222 total: 3.24s remaining: 297ms 916: learn: 0.2157209 total: 3.24s remaining: 293ms 917: learn: 0.2155155 total: 3.24s remaining: 290ms 918: learn: 0.2154142 total: 3.25s remaining: 286ms 919: learn: 0.2151079 total: 3.25s remaining: 283ms 920: learn: 0.2149705 total: 3.25s remaining: 279ms 921: learn: 0.2149322 total: 3.26s remaining: 276ms 922: learn: 0.2148807 total: 3.26s remaining: 272ms 923: learn: 0.2146427 total: 3.27s remaining: 269ms 924: learn: 0.2145465 total: 3.27s remaining: 265ms 925: learn: 0.2143896 total: 3.27s remaining: 261ms 926: learn: 0.2141042 total: 3.27s remaining: 258ms 927: learn: 0.2140653 total: 3.28s remaining: 254ms 928: learn: 0.2138073 total: 3.28s remaining: 251ms 929: learn: 0.2135790 total: 3.29s remaining: 247ms 930: learn: 0.2135487 total: 3.29s remaining: 244ms 931: learn: 0.2134147 total: 3.29s remaining: 240ms 932: learn: 0.2132543 total: 3.3s remaining: 237ms 933: learn: 0.2131319 total: 3.3s remaining: 233ms 934: learn: 0.2128832 total: 3.3s remaining: 230ms 935: learn: 0.2125868 total: 3.31s remaining: 226ms 936: learn: 0.2124035 total: 3.31s remaining: 223ms 937: learn: 0.2121759 total: 3.31s remaining: 219ms 938: learn: 0.2120445 total: 3.32s remaining: 216ms 939: learn: 0.2118077 total: 3.32s remaining: 212ms 940: learn: 0.2114668 total: 3.33s remaining: 208ms 941: learn: 0.2111545 total: 3.33s remaining: 205ms 942: learn: 0.2111246 total: 3.33s remaining: 201ms 943: learn: 0.2109490 total: 3.33s remaining: 198ms 944: learn: 0.2107927 total: 3.34s remaining: 194ms 945: learn: 0.2105585 total: 3.34s remaining: 191ms 946: learn: 0.2105239 total: 3.35s remaining: 187ms 947: learn: 0.2104112 total: 3.35s remaining: 184ms 948: learn: 0.2101941 total: 3.35s remaining: 180ms 949: learn: 0.2101563 total: 3.36s remaining: 177ms 950: learn: 0.2101322 total: 3.36s remaining: 173ms 951: learn: 0.2098230 total: 3.36s remaining: 170ms 952: learn: 0.2097301 total: 3.37s remaining: 166ms 953: learn: 0.2095164 total: 3.37s remaining: 163ms 954: learn: 0.2093993 total: 3.37s remaining: 159ms 955: learn: 0.2091224 total: 3.38s remaining: 155ms 956: learn: 0.2089994 total: 3.38s remaining: 152ms 957: learn: 0.2088083 total: 3.38s remaining: 148ms 958: learn: 0.2087013 total: 3.39s remaining: 145ms 959: learn: 0.2084003 total: 3.39s remaining: 141ms 960: learn: 0.2082851 total: 3.4s remaining: 138ms 961: learn: 0.2080820 total: 3.4s remaining: 134ms 962: learn: 0.2078811 total: 3.4s remaining: 131ms 963: learn: 0.2076056 total: 3.4s remaining: 127ms 964: learn: 0.2074108 total: 3.41s remaining: 124ms 965: learn: 0.2070406 total: 3.41s remaining: 120ms 966: learn: 0.2067699 total: 3.42s remaining: 117ms 967: learn: 0.2063896 total: 3.42s remaining: 113ms 968: learn: 0.2060907 total: 3.42s remaining: 110ms 969: learn: 0.2059021 total: 3.43s remaining: 106ms 970: learn: 0.2056345 total: 3.43s remaining: 102ms 971: learn: 0.2053408 total: 3.43s remaining: 98.9ms 972: learn: 0.2050676 total: 3.44s remaining: 95.4ms 973: learn: 0.2048304 total: 3.44s remaining: 91.9ms 974: learn: 0.2046749 total: 3.44s remaining: 88.3ms 975: learn: 0.2044718 total: 3.45s remaining: 84.8ms 976: learn: 0.2041876 total: 3.45s remaining: 81.3ms 977: learn: 0.2039090 total: 3.46s remaining: 77.7ms 978: learn: 0.2037805 total: 3.46s remaining: 74.2ms 979: learn: 0.2034705 total: 3.46s remaining: 70.7ms 980: learn: 0.2032025 total: 3.47s remaining: 67.1ms 981: learn: 0.2030209 total: 3.47s remaining: 63.6ms 982: learn: 0.2027762 total: 3.47s remaining: 60.1ms 983: learn: 0.2026031 total: 3.48s remaining: 56.5ms 984: learn: 0.2023929 total: 3.48s remaining: 53ms 985: learn: 0.2021011 total: 3.48s remaining: 49.5ms 986: learn: 0.2018960 total: 3.49s remaining: 45.9ms 987: learn: 0.2017884 total: 3.49s remaining: 42.4ms 988: learn: 0.2014352 total: 3.49s remaining: 38.9ms 989: learn: 0.2012731 total: 3.5s remaining: 35.3ms 990: learn: 0.2010667 total: 3.5s remaining: 31.8ms 991: learn: 0.2008429 total: 3.5s remaining: 28.3ms 992: learn: 0.2006669 total: 3.51s remaining: 24.7ms 993: learn: 0.2005137 total: 3.51s remaining: 21.2ms 994: learn: 0.2001325 total: 3.52s remaining: 17.7ms 995: learn: 0.1999855 total: 3.52s remaining: 14.1ms 996: learn: 0.1996729 total: 3.52s remaining: 10.6ms 997: learn: 0.1994904 total: 3.52s remaining: 7.06ms 998: learn: 0.1993044 total: 3.53s remaining: 3.53ms 999: learn: 0.1989547 total: 3.53s remaining: 0us
df_compare
| Model | Mean Squared Error | Mean Absolute Error | R2 Score | Training Time (s) | |
|---|---|---|---|---|---|
| 0 | Ridge Regression | 0.746614 | 0.598242 | 0.98175 | 0.002362 |
| 1 | Linear Regression | 0.77558 | 0.613905 | 0.981042 | 0.008197 |
| 2 | Lasso Regression | 1.066347 | 0.782536 | 0.973934 | 0.089346 |
| 3 | Elastic Net | 1.070319 | 0.718474 | 0.973837 | 0.100495 |
| 4 | Gradient Boosting | 87.40121 | 7.495735 | -1.136431 | 2.311828 |
| 5 | AdaBoost | 89.525725 | 7.622743 | -1.188363 | 0.722531 |
| 6 | Decision Tree | 91.434513 | 7.735897 | -1.235021 | 0.077579 |
| 7 | Random Forest | 92.824516 | 7.815107 | -1.268998 | 4.073694 |
| 11 | CatBoost | 93.180853 | 7.853729 | -1.277709 | 3.699676 |
| 8 | XGBoost | 94.682514 | 7.893578 | -1.314415 | 0.462786 |
| 9 | K-Neighbors Regressor | 330.959365 | 16.750109 | -7.089956 | 0.000574 |
| 10 | SVR | 349.552891 | 17.672316 | -7.544455 | 0.142434 |
We trained a variety of regression models to predict stock prices, including Linear Regression, Ridge Regression, Lasso Regression, Elastic Net, Support Vector Regression (SVR), K-Neighbors Regressor, Decision Tree, Random Forest, Gradient Boosting, AdaBoost, XGBoost, and CatBoost. The training results show a variety of metrics for different regression models, which are useful in evaluating their performance. Let's break down what each metric means and its significance:
# Train the linear regression model
lr_model_base = LinearRegression()
lr_model_base.fit(X_train_scaled, y_train)
# Make predictions on the scaled test set
lr_pred_base = lr_model_base.predict(X_test_scaled)
prediction_df = pd.DataFrame()
prediction_df['date'] = df[df.date.dt.year>=2020]['date']
prediction_df['y_test'] = y_test
prediction_df['lr_pred_base'] = lr_pred_base
prediction_df.head()
| date | y_test | lr_pred_base | |
|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 |
lr_score_base = evaluate_regression_model(y_test, lr_pred_base)
Mean Squared Error (MSE): 0.776 Root Mean Squared Error (RMSE): 0.881 Mean Absolute Error (MAE): 0.614 R-squared (R2): 0.981
lr_score_base
{'MSE': 0.7755799343709945,
'RMSE': 0.880670162076015,
'MAE': 0.6139047642970673,
'R2': 0.9810417591179742}
plot_regression_accuracy(y_test, lr_pred_base)
plot_predictions(df,lr_pred_base)
lr_base_feature_importance = plot_feature_importance(lr_model_base,X_train,20)
lr_base_feature_importance[:15]
| Feature | Importance | |
|---|---|---|
| 0 | adj close_10d_avg | 59.927343 |
| 1 | close_10d_avg | 52.842967 |
| 2 | close_15d_avg | 32.217386 |
| 3 | ema_9 | 31.326586 |
| 4 | adj close_15d_avg | 29.982687 |
| 5 | close_5d_avg | 17.929065 |
| 6 | sma_5 | 13.495880 |
| 7 | adj close_5d_avg | 12.461717 |
| 8 | adj close_3d_avg | 10.395039 |
| 9 | close_1d_ago | 9.861051 |
| 10 | adj close_7d_ago | 9.733592 |
| 11 | adj close_1d_ago | 9.044414 |
| 12 | close_7d_ago | 7.605479 |
| 13 | sma_15 | 6.911237 |
| 14 | adj close_14d_ago | 5.986732 |
keep_cols20 = lr_base_feature_importance[:20]['Feature'].tolist()
X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]
scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)
# Train the linear regression model
lr_model20 = LinearRegression()
lr_model20.fit(X_train_scaled20, y_train)
# Make predictions on the scaled test set
lr_pred20 = lr_model20.predict(X_test_scaled20)
lr_score20 = evaluate_regression_model(y_test, lr_pred20)
Mean Squared Error (MSE): 0.768 Root Mean Squared Error (RMSE): 0.877 Mean Absolute Error (MAE): 0.613 R-squared (R2): 0.981
prediction_df['lr_pred20'] = lr_pred20
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | |
|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 |
lr_score20
{'MSE': 0.7682885573217368,
'RMSE': 0.8765207112908039,
'MAE': 0.6133462584657988,
'R2': 0.9812199892092073}
plot_feature_importance(lr_model20,X_train20,20)
| Feature | Importance | |
|---|---|---|
| 0 | adj close_10d_avg | 39.701594 |
| 1 | close_10d_avg | 34.492844 |
| 2 | ema_9 | 16.380613 |
| 3 | adj close_7d_avg | 14.543602 |
| 4 | sma_5 | 11.032622 |
| 5 | close_15d_avg | 10.687774 |
| 6 | sma_15 | 9.954755 |
| 7 | adj close_5d_avg | 8.830203 |
| 8 | adj close_7d_ago | 8.207036 |
| 9 | adj close_15d_avg | 7.303476 |
| 10 | close_7d_ago | 7.068989 |
| 11 | adj close_1d_ago | 6.587839 |
| 12 | close_5d_avg | 6.427914 |
| 13 | close_1d_ago | 4.837201 |
| 14 | adj close_3d_avg | 4.708264 |
| 15 | close_3d_ago | 4.101885 |
| 16 | adj close_3d_ago | 3.394063 |
| 17 | low_5d_avg | 1.134743 |
| 18 | low_10d_avg | 0.507845 |
| 19 | adj close_14d_ago | 0.220884 |
keep_cols15 = lr_base_feature_importance[:15]['Feature'].tolist()
X_train15 = X_train[keep_cols15]
X_test15 = X_test[keep_cols15]
scaler = StandardScaler()
X_train_scaled15 = scaler.fit_transform(X_train15)
X_test_scaled15 = scaler.transform(X_test15)
# Train the linear regression model
lr_model15 = LinearRegression()
lr_model15.fit(X_train_scaled15, y_train)
# Make predictions on the scaled test set
lr_pred15 = lr_model15.predict(X_test_scaled15)
lr_score15 = evaluate_regression_model(y_test, lr_pred15)
Mean Squared Error (MSE): 0.797 Root Mean Squared Error (RMSE): 0.893 Mean Absolute Error (MAE): 0.626 R-squared (R2): 0.981
prediction_df['lr_pred15'] = lr_pred15
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | |
|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 |
lr_score15
{'MSE': 0.7968925999367115,
'RMSE': 0.8926884114497686,
'MAE': 0.6261544545364333,
'R2': 0.9805207932836008}
plot_feature_importance(lr_model15,X_train15,15)
| Feature | Importance | |
|---|---|---|
| 0 | close_10d_avg | 29.471317 |
| 1 | adj close_10d_avg | 24.455782 |
| 2 | ema_9 | 21.480216 |
| 3 | adj close_5d_avg | 16.328249 |
| 4 | adj close_1d_ago | 10.320838 |
| 5 | adj close_3d_avg | 9.162211 |
| 6 | close_1d_ago | 7.864891 |
| 7 | adj close_7d_ago | 7.162200 |
| 8 | sma_15 | 6.734176 |
| 9 | close_7d_ago | 6.356886 |
| 10 | close_5d_avg | 4.707638 |
| 11 | close_15d_avg | 3.343602 |
| 12 | sma_5 | 2.379653 |
| 13 | adj close_15d_avg | 2.216233 |
| 14 | adj close_14d_ago | 0.415949 |
keep_cols10 = lr_base_feature_importance[:10]['Feature'].tolist()
X_train10 = X_train[keep_cols10]
X_test10 = X_test[keep_cols10]
scaler = StandardScaler()
X_train_scaled10 = scaler.fit_transform(X_train10)
X_test_scaled10 = scaler.transform(X_test10)
# Train the linear regression model
lr_model10 = LinearRegression()
lr_model10.fit(X_train_scaled10, y_train)
# Make predictions on the scaled test set
lr_pred10 = lr_model10.predict(X_test_scaled10)
lr_score10 = evaluate_regression_model(y_test, lr_pred10)
Mean Squared Error (MSE): 0.786 Root Mean Squared Error (RMSE): 0.887 Mean Absolute Error (MAE): 0.626 R-squared (R2): 0.981
prediction_df['lr_pred10'] = lr_pred10
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | |
|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 |
lr_score10
{'MSE': 0.7864993593280865,
'RMSE': 0.8868479911056271,
'MAE': 0.6260216655405225,
'R2': 0.9807748451875646}
plot_feature_importance(lr_model10,X_train10,10)
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 16.842727 |
| 1 | close_5d_avg | 14.158982 |
| 2 | adj close_3d_avg | 8.615665 |
| 3 | adj close_5d_avg | 6.493537 |
| 4 | close_15d_avg | 6.467632 |
| 5 | close_10d_avg | 6.069412 |
| 6 | sma_5 | 3.130954 |
| 7 | adj close_10d_avg | 1.666197 |
| 8 | close_1d_ago | 0.905094 |
| 9 | adj close_15d_avg | 0.029654 |
ridge_model = Ridge()
# Define the hyperparameter grid to search
param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=ridge_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)
# Get the best model
best_ridge_model = grid_search.best_estimator_
# Make predictions on the test set
ridge_pred_base = best_ridge_model.predict(X_test_scaled)
# Evaluate the best model
mse = mean_squared_error(y_test, ridge_pred_base)
rmse = mean_squared_error(y_test, ridge_pred_base, squared=False)
mae = mean_absolute_error(y_test, ridge_pred_base)
r2 = r2_score(y_test, ridge_pred_base)
print("Best Ridge Regression Model:")
print(f"Best alpha: {best_ridge_model.alpha}")
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f"Mean Squared Error: {np.round(mse,3)}")
print(f"Mean Absolute Error: {np.round(mae,3)}")
print(f"R2 Score: {np.round(r2,3)}")
ridge_score = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
Best Ridge Regression Model: Best alpha: 0.001 Root Mean Squared Error (RMSE): 0.871 Mean Squared Error: 0.759 Mean Absolute Error: 0.606 R2 Score: 0.981
ridge_base_feature_importance = plot_feature_importance(best_ridge_model,X_train,20)
ridge_base_feature_importance[:20]
| Feature | Importance | |
|---|---|---|
| 0 | close_5d_avg | 19.627386 |
| 1 | ema_9 | 18.897025 |
| 2 | sma_5 | 18.403314 |
| 3 | adj close_5d_avg | 11.844801 |
| 4 | close_10d_avg | 9.872194 |
| 5 | adj close_10d_avg | 9.389002 |
| 6 | close_15d_avg | 7.887685 |
| 7 | close_1d_ago | 7.038813 |
| 8 | close_7d_avg | 6.958126 |
| 9 | adj close_1d_ago | 6.600528 |
| 10 | close_5d_ago | 6.008206 |
| 11 | sma_15 | 5.538994 |
| 12 | low_5d_avg | 4.965283 |
| 13 | low_10d_avg | 4.840114 |
| 14 | open_5d_avg | 4.446916 |
| 15 | high_5d_avg | 3.714418 |
| 16 | open_10d_avg | 3.486739 |
| 17 | high_30d_avg | 2.896473 |
| 18 | close_3d_ago | 2.893431 |
| 19 | open_15d_avg | 2.892245 |
prediction_df['ridge_pred_base'] = ridge_pred_base
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | |
|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 |
keep_cols20 = ridge_base_feature_importance[:20]['Feature'].tolist()
X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]
scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)
# Train model
ridge_model20 = Ridge(alpha=0.001)
ridge_model20.fit(X_train_scaled20, y_train)
# Make predictions on the scaled test set
ridge_pred20 = ridge_model20.predict(X_test_scaled20)
ridge_score20 = evaluate_regression_model(y_test, ridge_pred20)
Mean Squared Error (MSE): 0.73 Root Mean Squared Error (RMSE): 0.854 Mean Absolute Error (MAE): 0.586 R-squared (R2): 0.982
plot_feature_importance(ridge_model20,X_train20,20)
| Feature | Importance | |
|---|---|---|
| 0 | sma_5 | 25.441510 |
| 1 | close_5d_avg | 22.435160 |
| 2 | adj close_5d_avg | 11.490239 |
| 3 | adj close_1d_ago | 5.671591 |
| 4 | adj close_10d_avg | 5.507894 |
| 5 | close_5d_ago | 5.147037 |
| 6 | close_1d_ago | 4.554521 |
| 7 | close_10d_avg | 4.509604 |
| 8 | close_15d_avg | 3.719596 |
| 9 | low_5d_avg | 3.022988 |
| 10 | sma_15 | 2.915010 |
| 11 | ema_9 | 2.899160 |
| 12 | low_10d_avg | 1.985008 |
| 13 | high_5d_avg | 1.745675 |
| 14 | open_10d_avg | 1.590243 |
| 15 | open_5d_avg | 1.561036 |
| 16 | open_15d_avg | 1.264441 |
| 17 | close_3d_ago | 0.798379 |
| 18 | close_7d_avg | 0.640924 |
| 19 | high_30d_avg | 0.200250 |
prediction_df['ridge_pred20'] = ridge_pred20
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | |
|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 | 54.349344 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 | 54.381596 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 | 54.233187 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 | 54.036503 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 | 54.059649 |
lasso_model = Lasso()
param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)
# Get the best model
best_lasso_model = grid_search.best_estimator_
# Make predictions on the test set
lasso_pred_base = best_lasso_model.predict(X_test_scaled)
# Evaluate the best model
mse = mean_squared_error(y_test, lasso_pred_base)
rmse = mean_squared_error(y_test, lasso_pred_base, squared=False)
mae = mean_absolute_error(y_test, lasso_pred_base)
r2 = r2_score(y_test, lasso_pred_base)
print("Best Lasso Regression Model:")
print(f"Best alpha: {best_lasso_model.alpha}")
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f"Mean Squared Error: {np.round(mse,3)}")
print(f"Mean Absolute Error: {np.round(mae,3)}")
print(f"R2 Score: {np.round(r2,3)}")
lasso_score = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
Best Lasso Regression Model: Best alpha: 0.001 Root Mean Squared Error (RMSE): 0.97 Mean Squared Error: 0.94 Mean Absolute Error: 0.663 R2 Score: 0.977
lasso_base_feature_importance = plot_feature_importance(best_lasso_model,X_train,20)
lasso_base_feature_importance[:20]
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 4.177865 |
| 1 | close_3d_avg | 1.314731 |
| 2 | macd | 1.312350 |
| 3 | macd_signal | 1.160117 |
| 4 | sma_15 | 1.006266 |
| 5 | adj close_3d_avg | 0.822478 |
| 6 | low_1d_ago | 0.722646 |
| 7 | close_3d_ago | 0.583764 |
| 8 | rsi | 0.543082 |
| 9 | open_3d_ago | 0.510853 |
| 10 | sma_30 | 0.492814 |
| 11 | high_1d_ago | 0.466770 |
| 12 | adj close_3d_ago | 0.422750 |
| 13 | low_15d_avg | 0.404237 |
| 14 | open_1d_ago | 0.365671 |
| 15 | high_14d_ago | 0.307277 |
| 16 | low_30d_avg | 0.291303 |
| 17 | sma_10 | 0.255037 |
| 18 | open_30d_avg | 0.241245 |
| 19 | high_30d_avg | 0.227913 |
prediction_df['lasso_pred_base'] = lasso_pred_base
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | |
|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 | 54.349344 | 54.496024 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 | 54.381596 | 54.190970 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 | 54.233187 | 53.975755 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 | 54.036503 | 53.828707 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 | 54.059649 | 54.002350 |
keep_cols20 = lasso_base_feature_importance[:20]['Feature'].tolist()
X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]
scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)
# Train model
lasso_model20 = Lasso(alpha=0.001)
lasso_model20.fit(X_train_scaled20, y_train)
# Make predictions on the scaled test set
lasso_pred20 = lasso_model20.predict(X_test_scaled20)
lasso_score20 = evaluate_regression_model(y_test, lasso_pred20)
Mean Squared Error (MSE): 0.948 Root Mean Squared Error (RMSE): 0.974 Mean Absolute Error (MAE): 0.665 R-squared (R2): 0.977
plot_feature_importance(lasso_model20,X_train20,20)
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 4.017287 |
| 1 | close_3d_avg | 2.790781 |
| 2 | low_1d_ago | 1.047406 |
| 3 | close_3d_ago | 1.034968 |
| 4 | sma_30 | 1.029511 |
| 5 | low_15d_avg | 0.997798 |
| 6 | macd | 0.968169 |
| 7 | macd_signal | 0.880009 |
| 8 | high_14d_ago | 0.568316 |
| 9 | high_1d_ago | 0.533765 |
| 10 | rsi | 0.496376 |
| 11 | sma_10 | 0.423288 |
| 12 | adj close_3d_avg | 0.228550 |
| 13 | open_3d_ago | 0.204478 |
| 14 | open_1d_ago | 0.139447 |
| 15 | low_30d_avg | 0.025033 |
| 16 | sma_15 | 0.000000 |
| 17 | adj close_3d_ago | 0.000000 |
| 18 | open_30d_avg | 0.000000 |
| 19 | high_30d_avg | 0.000000 |
prediction_df['lasso_pred20'] = lasso_pred20
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | lasso_pred20 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 | 54.349344 | 54.496024 | 54.501734 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 | 54.381596 | 54.190970 | 54.262946 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 | 54.233187 | 53.975755 | 54.001497 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 | 54.036503 | 53.828707 | 53.859163 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 | 54.059649 | 54.002350 | 53.989285 |
elastic_net_model = ElasticNet()
# Define the hyperparameter grid to search
param_grid = {
'alpha': [0.001, 0.01, 0.1, 1, 10, 100],
'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]
}
# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=elastic_net_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)
# Get the best model
best_elastic_net_model = grid_search.best_estimator_
# Make predictions on the test set
elastic_pred_base = best_elastic_net_model.predict(X_test_scaled)
# Evaluate the best model
mse = mean_squared_error(y_test, elastic_pred_base)
rmse = mean_squared_error(y_test, elastic_pred_base, squared=False)
mae = mean_absolute_error(y_test, elastic_pred_base)
r2 = r2_score(y_test, elastic_pred_base)
print("Best Elastic Net Model:")
print(f"Best alpha: {best_elastic_net_model.alpha}")
print(f"Best l1_ratio: {best_elastic_net_model.l1_ratio}")
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f"Mean Squared Error: {np.round(mse,3)}")
print(f"Mean Absolute Error: {np.round(mae,3)}")
print(f"R2 Score: {np.round(r2,3)}")
elastic_score = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
Best Elastic Net Model: Best alpha: 0.001 Best l1_ratio: 0.1 Root Mean Squared Error (RMSE): 0.953 Mean Squared Error: 0.908 Mean Absolute Error: 0.653 R2 Score: 0.978
elastic_base_feature_importance = plot_feature_importance(best_elastic_net_model,X_train,20)
elastic_base_feature_importance[:20]
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 1.613307 |
| 1 | sma_5 | 1.518359 |
| 2 | macd | 1.493389 |
| 3 | close_3d_avg | 1.290926 |
| 4 | sma_10 | 1.289912 |
| 5 | macd_signal | 1.253588 |
| 6 | adj close_3d_avg | 1.239816 |
| 7 | low_1d_ago | 0.982940 |
| 8 | sma_15 | 0.923173 |
| 9 | high_1d_ago | 0.815800 |
| 10 | close_3d_ago | 0.765736 |
| 11 | open_1d_ago | 0.631358 |
| 12 | sma_30 | 0.586684 |
| 13 | low_3d_avg | 0.580421 |
| 14 | adj close_3d_ago | 0.544949 |
| 15 | open_3d_ago | 0.531747 |
| 16 | rsi | 0.485403 |
| 17 | high_3d_avg | 0.431768 |
| 18 | close_1d_ago | 0.425664 |
| 19 | low_15d_avg | 0.399066 |
prediction_df['elastic_pred_base'] = elastic_pred_base
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | lasso_pred20 | elastic_pred_base | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 | 54.349344 | 54.496024 | 54.501734 | 54.444972 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 | 54.381596 | 54.190970 | 54.262946 | 54.198628 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 | 54.233187 | 53.975755 | 54.001497 | 54.011290 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 | 54.036503 | 53.828707 | 53.859163 | 53.818335 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 | 54.059649 | 54.002350 | 53.989285 | 54.037049 |
keep_cols20 = elastic_base_feature_importance[:20]['Feature'].tolist()
X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]
scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)
# Train model
elastic_model20 = ElasticNet(alpha=0.001,l1_ratio = 0.9)
elastic_model20.fit(X_train_scaled20, y_train)
# Make predictions on the scaled test set
elastic_pred20 = elastic_model20.predict(X_test_scaled20)
elastic_score20 = evaluate_regression_model(y_test, elastic_pred20)
Mean Squared Error (MSE): 0.948 Root Mean Squared Error (RMSE): 0.974 Mean Absolute Error (MAE): 0.667 R-squared (R2): 0.977
plot_feature_importance(elastic_model20,X_train20,20)
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 3.518012 |
| 1 | close_3d_avg | 2.975563 |
| 2 | low_15d_avg | 1.164547 |
| 3 | sma_30 | 1.008792 |
| 4 | low_1d_ago | 0.912273 |
| 5 | high_1d_ago | 0.840268 |
| 6 | sma_10 | 0.826143 |
| 7 | adj close_3d_avg | 0.783827 |
| 8 | macd_signal | 0.701467 |
| 9 | macd | 0.690383 |
| 10 | sma_15 | 0.565839 |
| 11 | adj close_3d_ago | 0.556647 |
| 12 | rsi | 0.506954 |
| 13 | close_3d_ago | 0.408292 |
| 14 | open_3d_ago | 0.238611 |
| 15 | high_3d_avg | 0.039417 |
| 16 | low_3d_avg | 0.000000 |
| 17 | sma_5 | 0.000000 |
| 18 | close_1d_ago | 0.000000 |
| 19 | open_1d_ago | 0.000000 |
prediction_df['elastic_pred20'] = elastic_pred20
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | lasso_pred20 | elastic_pred_base | elastic_pred20 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 | 54.349344 | 54.496024 | 54.501734 | 54.444972 | 54.503795 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 | 54.381596 | 54.190970 | 54.262946 | 54.198628 | 54.263269 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 | 54.233187 | 53.975755 | 54.001497 | 54.011290 | 54.055709 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 | 54.036503 | 53.828707 | 53.859163 | 53.818335 | 53.920259 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 | 54.059649 | 54.002350 | 53.989285 | 54.037049 | 54.058664 |
ela_df = pd.DataFrame([elastic_score.keys(),elastic_score.values()])
ela_df.columns = ela_df.iloc[0]
ela_df = ela_df[1:].reset_index(drop=True)
ela_df['Model'] = 'Elastic_Net with All Features'
ela_20_df = pd.DataFrame([elastic_score20.keys(),elastic_score20.values()])
ela_20_df.columns = ela_20_df.iloc[0]
ela_20_df = ela_20_df[1:].reset_index(drop=True)
ela_20_df['Model'] = 'Elastic_Net with Top 20 Features'
lasso_df = pd.DataFrame([lasso_score.keys(),lasso_score.values()])
lasso_df.columns = lasso_df.iloc[0]
lasso_df = lasso_df[1:].reset_index(drop=True)
lasso_df['Model'] = 'Lasso with All Features'
lasso_20_df = pd.DataFrame([lasso_score20.keys(),lasso_score20.values()])
lasso_20_df.columns = lasso_20_df.iloc[0]
lasso_20_df = lasso_20_df[1:].reset_index(drop=True)
lasso_20_df['Model'] = 'Lasso with Top 20 Features'
ridge_df = pd.DataFrame([ridge_score.keys(),ridge_score.values()])
ridge_df.columns = ridge_df.iloc[0]
ridge_df = ridge_df[1:].reset_index(drop=True)
ridge_df['Model'] = 'Ridge with All Features'
ridge_20_df = pd.DataFrame([ridge_score20.keys(),ridge_score20.values()])
ridge_20_df.columns = ridge_20_df.iloc[0]
ridge_20_df = ridge_20_df[1:].reset_index(drop=True)
ridge_20_df['Model'] = 'Ridge with Top 20 Features'
lr_base_df = pd.DataFrame([lr_score_base.keys(),lr_score_base.values()])
lr_base_df.columns = lr_base_df.iloc[0]
lr_base_df = lr_base_df[1:].reset_index(drop=True)
lr_base_df['Model'] = 'Linear Reg. with All Features'
lr_20_df = pd.DataFrame([lr_score20.keys(),lr_score20.values()])
lr_20_df.columns = lr_20_df.iloc[0]
lr_20_df = lr_20_df[1:].reset_index(drop=True)
lr_20_df['Model'] = 'Linear Reg. with Top 20 Features'
lr_15_df = pd.DataFrame([lr_score15.keys(),lr_score15.values()])
lr_15_df.columns = lr_15_df.iloc[0]
lr_15_df = lr_15_df[1:].reset_index(drop=True)
lr_15_df['Model'] = 'Linear Reg. with Top 15 Features'
lr_10_df = pd.DataFrame([lr_score10.keys(),lr_score10.values()])
lr_10_df.columns = lr_10_df.iloc[0]
lr_10_df = lr_10_df[1:].reset_index(drop=True)
lr_10_df['Model'] = 'Linear Reg. with Top 10 Features'
df_compare = pd.concat([ela_df,lasso_df,ridge_df,ela_20_df,lasso_20_df,ridge_20_df,
lr_base_df,lr_20_df,lr_15_df,lr_10_df]).sort_values(by=['R2'],ascending=False).reset_index(drop=True)
df_compare
| MSE | RMSE | MAE | R2 | Model | |
|---|---|---|---|---|---|
| 0 | 0.729511 | 0.854114 | 0.585601 | 0.982168 | Ridge with Top 20 Features |
| 1 | 0.758611 | 0.870983 | 0.605987 | 0.981457 | Ridge with All Features |
| 2 | 0.768289 | 0.876521 | 0.613346 | 0.98122 | Linear Reg. with Top 20 Features |
| 3 | 0.77558 | 0.88067 | 0.613905 | 0.981042 | Linear Reg. with All Features |
| 4 | 0.786499 | 0.886848 | 0.626022 | 0.980775 | Linear Reg. with Top 10 Features |
| 5 | 0.796893 | 0.892688 | 0.626154 | 0.980521 | Linear Reg. with Top 15 Features |
| 6 | 0.90796 | 0.952869 | 0.652726 | 0.977806 | Elastic_Net with All Features |
| 7 | 0.940304 | 0.969693 | 0.663218 | 0.977015 | Lasso with All Features |
| 8 | 0.948047 | 0.973677 | 0.664517 | 0.976826 | Lasso with Top 20 Features |
| 9 | 0.948242 | 0.973777 | 0.666597 | 0.976821 | Elastic_Net with Top 20 Features |
After retraining the models with different alpha and input features, Ridge regression model with alpha 0.001 and all features performed best among others.
MSE measures the average squared difference between predicted and actual values. In this case, the MSE of 0.729511 is relatively low, indicating that, on average, the squared errors between predicted and actual values are small. Lower MSE values suggest better accuracy.
RMSE is the square root of the MSE and provides a measure of the average magnitude of the errors. A lower RMSE (0.854114) signifies that, on average, the model's predictions are close to the actual values. It is in the same unit as the target variable.
MAE measures the average absolute difference between predicted and actual values. With an MAE of 0.585601, the model's predictions, on average, deviate by approximately 0.60886 units from the actual values. Lower MAE values indicate better accuracy.
R2 represents the proportion of variance in the target variable that is predictable from the independent variables. An R2 value of 0.982168 is exceptionally high, indicating that the model explains about 98.21% of the variance in the closing stock prices. A higher R2 value suggests a better accuracy.
In summary, the provided accuracy scores collectively suggest that the model performs exceptionally well. The low MSE, RMSE, MAE and high R2 score indicate that the model's predictions are close to the actual values.
prediction_df
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | lasso_pred20 | elastic_pred_base | elastic_pred20 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 | 54.349344 | 54.496024 | 54.501734 | 54.444972 | 54.503795 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 | 54.381596 | 54.190970 | 54.262946 | 54.198628 | 54.263269 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 | 54.233187 | 53.975755 | 54.001497 | 54.011290 | 54.055709 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 | 54.036503 | 53.828707 | 53.859163 | 53.818335 | 53.920259 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 | 54.059649 | 54.002350 | 53.989285 | 54.037049 | 54.058664 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2694 | 2023-11-01 | 67.970001 | 67.251327 | 66.691627 | 66.293326 | 66.511564 | 67.298025 | 66.970336 | 66.962254 | 67.016646 | 67.047164 | 67.123766 |
| 2695 | 2023-11-02 | 68.820000 | 68.295223 | 67.635666 | 67.398789 | 67.681439 | 68.052648 | 67.865085 | 67.502858 | 67.485406 | 67.584452 | 67.672327 |
| 2696 | 2023-11-03 | 68.239998 | 68.864264 | 68.759090 | 68.689217 | 68.943305 | 68.811739 | 68.806564 | 68.155329 | 68.270610 | 68.171208 | 68.378378 |
| 2697 | 2023-11-06 | 68.489998 | 68.041446 | 68.134383 | 68.593341 | 68.842344 | 68.125227 | 68.286008 | 68.186065 | 68.384744 | 68.222377 | 68.472590 |
| 2698 | 2023-11-07 | 69.019997 | 68.239220 | 68.843554 | 69.136339 | 69.178896 | 68.336189 | 68.670691 | 68.368078 | 68.539077 | 68.463381 | 68.531877 |
970 rows × 12 columns
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred_base,label='lr_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred20,label='lr_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred15,label='lr_pred15')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred10,label='lr_pred10')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred_base,label='ridge_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred20,label='ridge_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred_base,label='lasso_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred20,label='lasso_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred_base,label='elastic_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred20,label='elastic_pred20')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred_base,label='lr_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred20,label='lr_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred15,label='lr_pred15')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred10,label='lr_pred10')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred_base,label='ridge_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred20,label='ridge_pred20')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred_base,label='lasso_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred20,label='lasso_pred20')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred_base,label='elastic_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred20,label='elastic_pred20')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
# target column is next day's close price
y_train = train_df['close_1d_next'].copy()
X_train = train_df.drop(['close_1d_next'], 1)
# target column is next day's close price
y_test = test_df['close_1d_next'].copy()
X_test = test_df.drop(['close_1d_next'], 1)
ridge_20_features = ridge_base_feature_importance[:20]['Feature'].tolist()
X_train = X_train[ridge_20_features]
X_test = X_test[ridge_20_features]
def train_ridge_regression(X_train,X_test,y_train,y_test):
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Train model
ridge_model = Ridge(alpha=0.001)
ridge_model.fit(X_train_scaled, y_train)
# Make predictions on the scaled test set
ridge_pred = ridge_model.predict(X_test_scaled)
ridge_score = evaluate_regression_model2(y_test, ridge_pred)
return ridge_model,ridge_pred,ridge_score
ridge_model, ridge_pred, ridge_score = train_ridge_regression(X_train,X_test,y_train,y_test)
ridge_score
{'MSE': 0.7295114881722916,
'RMSE': 0.8541144467647714,
'MAE': 0.5856010765186319,
'R2': 0.9821678541358965}
ridge_pred[:15]
array([54.34934416, 54.38159609, 54.23318706, 54.03650291, 54.05964943,
54.17549586, 54.25017275, 54.54902816, 54.18781481, 54.74202716,
55.14666847, 55.18770245, 55.53205975, 55.4774224 , 55.51006433])
plot_regression_accuracy(y_test, ridge_pred)
plot_predictions(df,ridge_pred)
plot_feature_importance(ridge_model,X_train,20)
| Feature | Importance | |
|---|---|---|
| 0 | sma_5 | 25.441510 |
| 1 | close_5d_avg | 22.435160 |
| 2 | adj close_5d_avg | 11.490239 |
| 3 | adj close_1d_ago | 5.671591 |
| 4 | adj close_10d_avg | 5.507894 |
| 5 | close_5d_ago | 5.147037 |
| 6 | close_1d_ago | 4.554521 |
| 7 | close_10d_avg | 4.509604 |
| 8 | close_15d_avg | 3.719596 |
| 9 | low_5d_avg | 3.022988 |
| 10 | sma_15 | 2.915010 |
| 11 | ema_9 | 2.899160 |
| 12 | low_10d_avg | 1.985008 |
| 13 | high_5d_avg | 1.745675 |
| 14 | open_10d_avg | 1.590243 |
| 15 | open_5d_avg | 1.561036 |
| 16 | open_15d_avg | 1.264441 |
| 17 | close_3d_ago | 0.798379 |
| 18 | close_7d_avg | 0.640924 |
| 19 | high_30d_avg | 0.200250 |
The residual, scatter, and time series line charts above clearly show that the predicted values are very close to the actual values. These visualizations confirm that the model is very good at making accurate predictions, highlighting its strong performance and reliability in understanding the details of the data.
df_all = pd.read_parquet(out_loc+"stock_1d.parquet")
df_all.columns = df_all.columns.str.lower()
### keep stocks in data with min year 2013, max year 2023
stock_min_dt = pd.DataFrame(df_all.groupby('symbol')['date'].min()).reset_index().rename(columns={'date':'min_date'})
stock_max_dt = pd.DataFrame(df_all.groupby('symbol')['date'].max()).reset_index().rename(columns={'date':'max_date'})
stock_cnt_dt = pd.DataFrame(df_all.groupby('symbol')['date'].count()).reset_index().rename(columns={'date':'days_cnt'})
stock_cnt = stock_min_dt.merge(stock_max_dt,on='symbol').merge(stock_cnt_dt,on='symbol')
stock_cnt['min_year'] = stock_cnt['min_date'].dt.year
stock_cnt['max_year'] = stock_cnt['max_date'].dt.year
keep_stocks = stock_cnt[(stock_cnt['min_year']==2013)&(stock_cnt['max_year']==2023)&(stock_cnt['days_cnt']>=2500)]['symbol'].unique().tolist()
stock_cnt.head()
| symbol | min_date | max_date | days_cnt | min_year | max_year | |
|---|---|---|---|---|---|---|
| 0 | A | 2013-01-02 | 2023-11-08 | 2733 | 2013 | 2023 |
| 1 | AAL | 2013-01-02 | 2023-11-08 | 2733 | 2013 | 2023 |
| 2 | AAPL | 2013-01-02 | 2023-11-08 | 2733 | 2013 | 2023 |
| 3 | ABBV | 2013-01-02 | 2023-11-08 | 2733 | 2013 | 2023 |
| 4 | ABNB | 2020-12-10 | 2023-11-08 | 733 | 2020 | 2023 |
df_2023 = df_all[(df_all.date.dt.year==2023) & (df_all.symbol.isin(keep_stocks))]
# volume vs stocks
volume_2023 = pd.DataFrame(df_2023.groupby(['symbol','security','gics sector'])['volume'].sum()).reset_index()
volume_2023 = volume_2023.sort_values(by='volume',ascending=False).reset_index(drop=True)
volume_2023.head()
| symbol | security | gics sector | volume | |
|---|---|---|---|---|
| 0 | TSLA | Tesla, Inc. | Consumer Discretionary | 3.009291e+10 |
| 1 | AMD | AMD | Information Technology | 1.342035e+10 |
| 2 | AMZN | Amazon | Consumer Discretionary | 1.305160e+10 |
| 3 | AAPL | Apple Inc. | Information Technology | 1.303964e+10 |
| 4 | F | Ford Motor Company | Consumer Discretionary | 1.278319e+10 |
# volume vs sectors
sector_2023 = pd.DataFrame(df_2023.groupby(['gics sector'])['volume'].sum()).reset_index()
sector_2023 = sector_2023.sort_values(by='volume',ascending=False).reset_index(drop=True)
sector_2023
| gics sector | volume | |
|---|---|---|
| 0 | Consumer Discretionary | 9.171407e+10 |
| 1 | Information Technology | 8.888840e+10 |
| 2 | Financials | 6.728113e+10 |
| 3 | Communication Services | 5.267892e+10 |
| 4 | Health Care | 3.755560e+10 |
| 5 | Industrials | 3.672492e+10 |
| 6 | Energy | 3.245171e+10 |
| 7 | Consumer Staples | 2.824873e+10 |
| 8 | Utilities | 2.214882e+10 |
| 9 | Materials | 1.432867e+10 |
| 10 | Real Estate | 1.318748e+10 |
# filter top 5 sectors with highest volume in 2023
sector_list = sector_2023[:5]['gics sector'].tolist()
stock_list = []
num_stocks = 5
# stocks with highest volume in each sector
for sec in sector_list:
stock_list.append(volume_2023[volume_2023['gics sector']==sec]['symbol'][:num_stocks].tolist())
stock_list = [item for sublist in stock_list for item in sublist]
len(stock_list)
25
df_stocks = df_all[df_all['symbol'].isin(stock_list)].reset_index(drop=True)
df_stocks.head()
| date | open | high | low | close | adj close | volume | symbol | security | gics sector | gics sub-industry | headquarters location | date added | cik | founded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2013-01-02 | 18.003504 | 18.193193 | 17.931683 | 18.099348 | 18.099348 | 101550348.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
| 1 | 2013-01-03 | 18.141392 | 18.316566 | 18.036036 | 18.109859 | 18.109859 | 92635272.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
| 2 | 2013-01-04 | 18.251753 | 18.555305 | 18.210211 | 18.467718 | 18.467718 | 110429460.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
| 3 | 2013-01-07 | 18.404655 | 18.503002 | 18.282784 | 18.387136 | 18.387136 | 66161772.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
| 4 | 2013-01-08 | 18.406906 | 18.425926 | 18.128880 | 18.350851 | 18.350851 | 66976956.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
def preprocess_data(df):
add_moving_averages(df, 'close')
df['rsi'] = rsi(df)
df['mfi'] = mfi(df, 14)
df['macd'] = df['close'].ewm(span=12, min_periods=12).mean() - df['close'].ewm(span=26, min_periods=26).mean()
df['macd_signal'] = df['macd'].ewm(span=9, min_periods=9).mean()
df['close_1d_next'] = df['close'].shift(-1)
# Define lag periods and rolling window sizes
lag_periods = [1, 3, 5, 7, 14, 21, 28]
rolling_windows = [3, 5, 7, 10, 15, 30]
# Columns to create features for
columns = ['close', 'adj close', 'open', 'high', 'low', 'volume']
# Add lagged and rolling average features for each column
for column in columns:
add_lagged_features(df, column, lag_periods)
add_rolling_avg_features(df, column, rolling_windows)
#df = df.dropna().reset_index(drop=True)
return df
stock_compare = []
for stock in stock_list:
stock_data = df_stocks[df_stocks['symbol'] == stock]
stock_data = preprocess_data(stock_data)
stock_data = stock_data.dropna().reset_index(drop=True)
# Split the DataFrame into training and testing sets
train_df_temp = stock_data[stock_data.date.dt.year<2020]
test_df_temp = stock_data[stock_data.date.dt.year>=2020]
drop_cols1 = ['date','open','high','low','close','adj close','volume','symbol','security',
'gics sector','gics sub-industry','headquarters location','date added','cik','founded']
train_df_temp = train_df_temp.drop(drop_cols1, 1)
test_df_temp = test_df_temp.drop(drop_cols1, 1)
# target column is next day's close price
y_train_temp = train_df_temp['close_1d_next'].copy()
X_train_temp = train_df_temp.drop(['close_1d_next'], 1)
# target column is next day's close price
y_test_temp = test_df_temp['close_1d_next'].copy()
X_test_temp = test_df_temp.drop(['close_1d_next'], 1)
# print(stock, len(X_train), len(X_test), len(y_train), len(y_test))
X_train_temp = X_train_temp[ridge_20_features]
X_test_temp = X_test_temp[ridge_20_features]
temp_model, temp_pred, temp_score = train_ridge_regression(X_train_temp,X_test_temp,y_train_temp,y_test_temp)
score_df = pd.DataFrame([temp_score.keys(),temp_score.values()])
score_df.columns = score_df.iloc[0]
score_df = score_df[1:].reset_index(drop=True)
score_df['symbol'] = stock
stock_compare.append(score_df)
compare_df = pd.concat(stock_compare).sort_values(by='R2',ascending=False).reset_index(drop =True)
compare_df
| MSE | RMSE | MAE | R2 | symbol | |
|---|---|---|---|---|---|
| 0 | 53.880829 | 7.340356 | 4.993183 | 0.995324 | NVDA |
| 1 | 0.492338 | 0.701668 | 0.488305 | 0.994058 | VZ |
| 2 | 7.288854 | 2.699788 | 2.009309 | 0.993042 | AAPL |
| 3 | 4.806856 | 2.192454 | 1.600387 | 0.992472 | GOOG |
| 4 | 4.773841 | 2.184912 | 1.587615 | 0.992342 | GOOGL |
| 5 | 44.497002 | 6.670607 | 4.551507 | 0.990978 | META |
| 6 | 1.977385 | 1.406195 | 0.998546 | 0.99076 | CVS |
| 7 | 24.89376 | 4.989365 | 3.774 | 0.990518 | MSFT |
| 8 | 1.212324 | 1.101056 | 0.819318 | 0.990392 | GM |
| 9 | 75.230542 | 8.673554 | 6.092839 | 0.990036 | TSLA |
| 10 | 0.153693 | 0.392037 | 0.279125 | 0.989988 | F |
| 11 | 0.5373 | 0.733008 | 0.545867 | 0.989299 | BAC |
| 12 | 0.568511 | 0.753996 | 0.543832 | 0.988847 | PFE |
| 13 | 0.245541 | 0.495521 | 0.359251 | 0.988802 | KEY |
| 14 | 1.632282 | 1.277608 | 0.868622 | 0.988481 | INTC |
| 15 | 0.802889 | 0.896041 | 0.613263 | 0.987932 | CCL |
| 16 | 0.940589 | 0.96984 | 0.7127 | 0.987702 | WFC |
| 17 | 10.046478 | 3.169618 | 2.318726 | 0.987042 | AMZN |
| 18 | 0.151731 | 0.389527 | 0.254337 | 0.985373 | T |
| 19 | 2.091562 | 1.446223 | 1.031672 | 0.984222 | C |
| 20 | 9.521865 | 3.085752 | 2.267984 | 0.98374 | AMD |
| 21 | 0.111184 | 0.333443 | 0.243588 | 0.981839 | HBAN |
| 22 | 0.791798 | 0.889831 | 0.641554 | 0.98043 | BMY |
| 23 | 4.116423 | 2.028897 | 1.434524 | 0.969971 | JNJ |
| 24 | 1.319744 | 1.148801 | 0.902782 | 0.866884 | VTRS |
The final phase of the project involved applying the developed model to real-world scenarios. By identifying the top 5 industries with the highest volume in 2023, we ensured that our predictions were grounded in current market dynamics. The subsequent selection of 5 stocks within each industry added a layer of practicality to our findings.
The model's stellar performance on NVDA, AAPL, VZ, GOOG, and GOOGL proved its robustness in diverse market conditions. Simultaneously, the challenges encountered with VTRS opened up opportunities for further investigation into the factors contributing to its underperformance.